diff options
Diffstat (limited to 'net')
193 files changed, 5513 insertions, 5195 deletions
diff --git a/net/atm/clip.c b/net/atm/clip.c index f271a7bcf5b2..65f706e4344c 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -617,7 +617,7 @@ static void atmarpd_close(struct atm_vcc *vcc) module_put(THIS_MODULE); } -static struct atmdev_ops atmarpd_dev_ops = { +static const struct atmdev_ops atmarpd_dev_ops = { .close = atmarpd_close }; diff --git a/net/atm/lec.c b/net/atm/lec.c index 093fe8707731..a3d93a1bb133 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -486,7 +486,7 @@ static void lec_atm_close(struct atm_vcc *vcc) module_put(THIS_MODULE); } -static struct atmdev_ops lecdev_ops = { +static const struct atmdev_ops lecdev_ops = { .close = lec_atm_close, .send = lec_atm_send }; diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 680a4b9095a1..5677147209e8 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -779,7 +779,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb) netif_rx(new_skb); } -static struct atmdev_ops mpc_ops = { /* only send is required */ +static const struct atmdev_ops mpc_ops = { /* only send is required */ .close = mpoad_close, .send = msg_from_mpoad }; diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 983c3a21a133..0a20f6e953ac 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -217,7 +217,7 @@ static void sigd_close(struct atm_vcc *vcc) read_unlock(&vcc_sklist_lock); } -static struct atmdev_ops sigd_dev_ops = { +static const struct atmdev_ops sigd_dev_ops = { .close = sigd_close, .send = sigd_send }; diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index a3501173e200..83ba5483455a 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -729,11 +729,9 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, const unsigned char *packet_buff, int packet_len, bool direct_link) { - unsigned char *skb_buff; unsigned long new_direct_link_flag; - skb_buff = skb_put_data(forw_packet_aggr->skb, packet_buff, - packet_len); + skb_put_data(forw_packet_aggr->skb, packet_buff, packet_len); forw_packet_aggr->packet_len += packet_len; forw_packet_aggr->num_packets++; @@ -1281,7 +1279,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, batadv_ogm_packet->tq = combined_tq; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "bidirectional: orig = %-15pM neigh = %-15pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n", + "bidirectional: orig = %pM neigh = %pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n", orig_node->orig, orig_neigh_node->orig, total_count, neigh_rq_count, tq_own, tq_asym_penalty, tq_iface_penalty, batadv_ogm_packet->tq, if_incoming->net_dev->name, diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 1e3dc374bfde..8be61734fc43 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -137,7 +137,7 @@ static void batadv_v_ogm_send(struct work_struct *work) struct batadv_priv *bat_priv; struct batadv_ogm2_packet *ogm_packet; struct sk_buff *skb, *skb_tmp; - unsigned char *ogm_buff, *pkt_buff; + unsigned char *ogm_buff; int ogm_buff_len; u16 tvlv_len = 0; int ret; @@ -166,7 +166,7 @@ static void batadv_v_ogm_send(struct work_struct *work) goto reschedule; skb_reserve(skb, ETH_HLEN); - pkt_buff = skb_put_data(skb, ogm_buff, ogm_buff_len); + skb_put_data(skb, ogm_buff, ogm_buff_len); ogm_packet = (struct batadv_ogm2_packet *)skb->data; ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno)); @@ -200,7 +200,7 @@ static void batadv_v_ogm_send(struct work_struct *work) type = "unknown"; } - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselve on %s surpressed: %s\n", + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselves on %s suppressed: %s\n", hard_iface->net_dev->name, type); batadv_hardif_put(hard_iface); @@ -683,18 +683,18 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, ogm_throughput = ntohl(ogm_packet->throughput); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, troughput %u, TTL %u, V %u, tvlv_len %u)\n", + "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, throughput %u, TTL %u, V %u, tvlv_len %u)\n", ethhdr->h_source, if_incoming->net_dev->name, if_incoming->net_dev->dev_addr, ogm_packet->orig, ntohl(ogm_packet->seqno), ogm_throughput, ogm_packet->ttl, ogm_packet->version, ntohs(ogm_packet->tvlv_len)); - /* If the troughput metric is 0, immediately drop the packet. No need to - * create orig_node / neigh_node for an unusable route. + /* If the throughput metric is 0, immediately drop the packet. No need + * to create orig_node / neigh_node for an unusable route. */ if (ogm_throughput == 0) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: originator packet with troughput metric of 0\n"); + "Drop packet: originator packet with throughput metric of 0\n"); return; } @@ -762,7 +762,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, type = "unknown"; } - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s surpressed: %s\n", + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s suppressed: %s\n", ogm_packet->orig, hard_iface->net_dev->name, type); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 6930d6b50f99..b6cfa78e9381 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -834,7 +834,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) last_seen_msecs = last_seen_msecs % 60000; last_seen_secs = last_seen_msecs / 1000; - seq_printf(seq, " * %15pI4 %14pM %4i %6i:%02i\n", + seq_printf(seq, " * %15pI4 %pM %4i %6i:%02i\n", &dat_entry->ip, dat_entry->mac_addr, batadv_print_vid(dat_entry->vid), last_seen_mins, last_seen_secs); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 2be8f1f46529..05cc7637c064 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2017.2" +#define BATADV_SOURCE_VERSION "2017.3" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index d239a9d72ac3..054a65e6eb68 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -911,7 +911,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) type = "unknown"; } - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s surpressed: %s\n", + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s suppressed: %s\n", bcast_packet->orig, hard_iface->net_dev->name, type); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index ab3b654b05cc..4e2576fc0c59 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -273,9 +273,6 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev, struct lowpan_peer *peer) { const u8 *saddr; - struct lowpan_btle_dev *dev; - - dev = lowpan_btle_dev(netdev); saddr = peer->lladdr; @@ -618,12 +615,8 @@ static void ifup(struct net_device *netdev) static void ifdown(struct net_device *netdev) { - int err; - rtnl_lock(); - err = dev_close(netdev); - if (err < 0) - BT_INFO("iface %s cannot be closed (%d)", netdev->name, err); + dev_close(netdev); rtnl_unlock(); } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index a5e4a736a984..a79b648aac88 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -169,29 +169,11 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr) } } -static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) -{ - struct switchdev_obj_port_fdb fdb = { - .obj = { - .orig_dev = f->dst->dev, - .id = SWITCHDEV_OBJ_ID_PORT_FDB, - .flags = SWITCHDEV_F_DEFER, - }, - .vid = f->vlan_id, - }; - - ether_addr_copy(fdb.addr, f->addr.addr); - switchdev_port_obj_del(f->dst->dev, &fdb.obj); -} - static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) { if (f->is_static) fdb_del_hw_addr(br, f->addr.addr); - if (f->added_by_external_learn) - fdb_del_external_learn(f); - hlist_del_init_rcu(&f->hlist); fdb_notify(br, f, RTM_DELNEIGH); call_rcu(&f->rcu, fdb_rcu_free); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index a0b11e7d67d9..ca01def49af0 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -713,9 +713,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void br_mdb_init(void) { - rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL); - rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL); - rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL); + rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0); + rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0); } void br_mdb_uninit(void) diff --git a/net/can/gw.c b/net/can/gw.c index 29748d844c3f..73a02af4b5d7 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -1031,15 +1031,15 @@ static __init int cgw_module_init(void) notifier.notifier_call = cgw_notifier; register_netdevice_notifier(¬ifier); - if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, NULL)) { + if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, 0)) { unregister_netdevice_notifier(¬ifier); kmem_cache_destroy(cgw_cache); return -ENOBUFS; } /* Only the first call to __rtnl_register can fail */ - __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, NULL); - __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, NULL); + __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, 0); + __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, 0); return 0; } diff --git a/net/core/Makefile b/net/core/Makefile index 79f9479e9658..56d771a887b6 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -9,9 +9,9 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ - sock_diag.o dev_ioctl.o tso.o sock_reuseport.o + sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ + fib_notifier.o -obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o diff --git a/net/core/datagram.c b/net/core/datagram.c index ee5647bd91b3..2f3277945d35 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -573,27 +573,12 @@ fault: } EXPORT_SYMBOL(skb_copy_datagram_from_iter); -/** - * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter - * @skb: buffer to copy - * @from: the source to copy from - * - * The function will first copy up to headlen, and then pin the userspace - * pages and build frags through them. - * - * Returns 0, -EFAULT or -EMSGSIZE. - */ -int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) +int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, + struct iov_iter *from, size_t length) { - int len = iov_iter_count(from); - int copy = min_t(int, skb_headlen(skb), len); - int frag = 0; + int frag = skb_shinfo(skb)->nr_frags; - /* copy up to skb headlen */ - if (skb_copy_datagram_from_iter(skb, 0, from, copy)) - return -EFAULT; - - while (iov_iter_count(from)) { + while (length && iov_iter_count(from)) { struct page *pages[MAX_SKB_FRAGS]; size_t start; ssize_t copied; @@ -603,18 +588,24 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) if (frag == MAX_SKB_FRAGS) return -EMSGSIZE; - copied = iov_iter_get_pages(from, pages, ~0U, + copied = iov_iter_get_pages(from, pages, length, MAX_SKB_FRAGS - frag, &start); if (copied < 0) return -EFAULT; iov_iter_advance(from, copied); + length -= copied; truesize = PAGE_ALIGN(copied + start); skb->data_len += copied; skb->len += copied; skb->truesize += truesize; - refcount_add(truesize, &skb->sk->sk_wmem_alloc); + if (sk && sk->sk_type == SOCK_STREAM) { + sk->sk_wmem_queued += truesize; + sk_mem_charge(sk, truesize); + } else { + refcount_add(truesize, &skb->sk->sk_wmem_alloc); + } while (copied) { int size = min_t(int, copied, PAGE_SIZE - start); skb_fill_page_desc(skb, frag++, pages[n], start, size); @@ -625,6 +616,28 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) } return 0; } +EXPORT_SYMBOL(__zerocopy_sg_from_iter); + +/** + * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter + * @skb: buffer to copy + * @from: the source to copy from + * + * The function will first copy up to headlen, and then pin the userspace + * pages and build frags through them. + * + * Returns 0, -EFAULT or -EMSGSIZE. + */ +int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) +{ + int copy = min_t(int, skb_headlen(skb), iov_iter_count(from)); + + /* copy up to skb headlen */ + if (skb_copy_datagram_from_iter(skb, 0, from, copy)) + return -EFAULT; + + return __zerocopy_sg_from_iter(NULL, skb, from, ~0U); +} EXPORT_SYMBOL(zerocopy_sg_from_iter); static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, diff --git a/net/core/dev.c b/net/core/dev.c index ce15a06d5558..3f69f6e71824 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -144,6 +144,7 @@ #include <linux/netfilter_ingress.h> #include <linux/crash_dump.h> #include <linux/sctp.h> +#include <net/udp_tunnel.h> #include "net-sysfs.h" @@ -1413,7 +1414,7 @@ int dev_open(struct net_device *dev) } EXPORT_SYMBOL(dev_open); -static int __dev_close_many(struct list_head *head) +static void __dev_close_many(struct list_head *head) { struct net_device *dev; @@ -1455,23 +1456,18 @@ static int __dev_close_many(struct list_head *head) dev->flags &= ~IFF_UP; netpoll_poll_enable(dev); } - - return 0; } -static int __dev_close(struct net_device *dev) +static void __dev_close(struct net_device *dev) { - int retval; LIST_HEAD(single); list_add(&dev->close_list, &single); - retval = __dev_close_many(&single); + __dev_close_many(&single); list_del(&single); - - return retval; } -int dev_close_many(struct list_head *head, bool unlink) +void dev_close_many(struct list_head *head, bool unlink) { struct net_device *dev, *tmp; @@ -1488,8 +1484,6 @@ int dev_close_many(struct list_head *head, bool unlink) if (unlink) list_del_init(&dev->close_list); } - - return 0; } EXPORT_SYMBOL(dev_close_many); @@ -1502,7 +1496,7 @@ EXPORT_SYMBOL(dev_close_many); * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier * chain. */ -int dev_close(struct net_device *dev) +void dev_close(struct net_device *dev) { if (dev->flags & IFF_UP) { LIST_HEAD(single); @@ -1511,7 +1505,6 @@ int dev_close(struct net_device *dev) dev_close_many(&single, true); list_del(&single); } - return 0; } EXPORT_SYMBOL(dev_close); @@ -1860,7 +1853,7 @@ static inline int deliver_skb(struct sk_buff *skb, struct packet_type *pt_prev, struct net_device *orig_dev) { - if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) return -ENOMEM; refcount_inc(&skb->users); return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); @@ -3865,6 +3858,121 @@ drop: return NET_RX_DROP; } +static u32 netif_receive_generic_xdp(struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + struct xdp_buff xdp; + u32 act = XDP_DROP; + void *orig_data; + int hlen, off; + u32 mac_len; + + /* Reinjected packets coming from act_mirred or similar should + * not get XDP generic processing. + */ + if (skb_cloned(skb)) + return XDP_PASS; + + if (skb_linearize(skb)) + goto do_drop; + + /* The XDP program wants to see the packet starting at the MAC + * header. + */ + mac_len = skb->data - skb_mac_header(skb); + hlen = skb_headlen(skb) + mac_len; + xdp.data = skb->data - mac_len; + xdp.data_end = xdp.data + hlen; + xdp.data_hard_start = skb->data - skb_headroom(skb); + orig_data = xdp.data; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + off = xdp.data - orig_data; + if (off > 0) + __skb_pull(skb, off); + else if (off < 0) + __skb_push(skb, -off); + + switch (act) { + case XDP_REDIRECT: + case XDP_TX: + __skb_push(skb, mac_len); + /* fall through */ + case XDP_PASS: + break; + + default: + bpf_warn_invalid_xdp_action(act); + /* fall through */ + case XDP_ABORTED: + trace_xdp_exception(skb->dev, xdp_prog, act); + /* fall through */ + case XDP_DROP: + do_drop: + kfree_skb(skb); + break; + } + + return act; +} + +/* When doing generic XDP we have to bypass the qdisc layer and the + * network taps in order to match in-driver-XDP behavior. + */ +static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) +{ + struct net_device *dev = skb->dev; + struct netdev_queue *txq; + bool free_skb = true; + int cpu, rc; + + txq = netdev_pick_tx(dev, skb, NULL); + cpu = smp_processor_id(); + HARD_TX_LOCK(dev, txq, cpu); + if (!netif_xmit_stopped(txq)) { + rc = netdev_start_xmit(skb, dev, txq, 0); + if (dev_xmit_complete(rc)) + free_skb = false; + } + HARD_TX_UNLOCK(dev, txq); + if (free_skb) { + trace_xdp_exception(dev, xdp_prog, XDP_TX); + kfree_skb(skb); + } +} + +static struct static_key generic_xdp_needed __read_mostly; + +static int do_xdp_generic(struct sk_buff *skb) +{ + struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog); + + if (xdp_prog) { + u32 act = netif_receive_generic_xdp(skb, xdp_prog); + int err; + + if (act != XDP_PASS) { + switch (act) { + case XDP_REDIRECT: + err = xdp_do_generic_redirect(skb->dev, skb); + if (err) + goto out_redir; + /* fallthru to submit skb */ + case XDP_TX: + generic_xdp_tx(skb, xdp_prog); + break; + } + return XDP_DROP; + } + } + return XDP_PASS; +out_redir: + trace_xdp_exception(skb->dev, xdp_prog, XDP_REDIRECT); + kfree_skb(skb); + return XDP_DROP; +} + static int netif_rx_internal(struct sk_buff *skb) { int ret; @@ -3872,6 +3980,18 @@ static int netif_rx_internal(struct sk_buff *skb) net_timestamp_check(netdev_tstamp_prequeue, skb); trace_netif_rx(skb); + + if (static_key_false(&generic_xdp_needed)) { + int ret = do_xdp_generic(skb); + + /* Consider XDP consuming the packet a success from + * the netdev point of view we do not want to count + * this as an error. + */ + if (ret != XDP_PASS) + return NET_RX_SUCCESS; + } + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -4292,7 +4412,7 @@ skip_classify: } if (pt_prev) { - if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) goto drop; else ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); @@ -4338,8 +4458,6 @@ static int __netif_receive_skb(struct sk_buff *skb) return ret; } -static struct static_key generic_xdp_needed __read_mostly; - static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) { struct bpf_prog *old = rtnl_dereference(dev->xdp_prog); @@ -4373,89 +4491,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) return ret; } -static u32 netif_receive_generic_xdp(struct sk_buff *skb, - struct bpf_prog *xdp_prog) -{ - struct xdp_buff xdp; - u32 act = XDP_DROP; - void *orig_data; - int hlen, off; - u32 mac_len; - - /* Reinjected packets coming from act_mirred or similar should - * not get XDP generic processing. - */ - if (skb_cloned(skb)) - return XDP_PASS; - - if (skb_linearize(skb)) - goto do_drop; - - /* The XDP program wants to see the packet starting at the MAC - * header. - */ - mac_len = skb->data - skb_mac_header(skb); - hlen = skb_headlen(skb) + mac_len; - xdp.data = skb->data - mac_len; - xdp.data_end = xdp.data + hlen; - xdp.data_hard_start = skb->data - skb_headroom(skb); - orig_data = xdp.data; - - act = bpf_prog_run_xdp(xdp_prog, &xdp); - - off = xdp.data - orig_data; - if (off > 0) - __skb_pull(skb, off); - else if (off < 0) - __skb_push(skb, -off); - - switch (act) { - case XDP_TX: - __skb_push(skb, mac_len); - /* fall through */ - case XDP_PASS: - break; - - default: - bpf_warn_invalid_xdp_action(act); - /* fall through */ - case XDP_ABORTED: - trace_xdp_exception(skb->dev, xdp_prog, act); - /* fall through */ - case XDP_DROP: - do_drop: - kfree_skb(skb); - break; - } - - return act; -} - -/* When doing generic XDP we have to bypass the qdisc layer and the - * network taps in order to match in-driver-XDP behavior. - */ -static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) -{ - struct net_device *dev = skb->dev; - struct netdev_queue *txq; - bool free_skb = true; - int cpu, rc; - - txq = netdev_pick_tx(dev, skb, NULL); - cpu = smp_processor_id(); - HARD_TX_LOCK(dev, txq, cpu); - if (!netif_xmit_stopped(txq)) { - rc = netdev_start_xmit(skb, dev, txq, 0); - if (dev_xmit_complete(rc)) - free_skb = false; - } - HARD_TX_UNLOCK(dev, txq); - if (free_skb) { - trace_xdp_exception(dev, xdp_prog, XDP_TX); - kfree_skb(skb); - } -} - static int netif_receive_skb_internal(struct sk_buff *skb) { int ret; @@ -4468,17 +4503,11 @@ static int netif_receive_skb_internal(struct sk_buff *skb) rcu_read_lock(); if (static_key_false(&generic_xdp_needed)) { - struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog); + int ret = do_xdp_generic(skb); - if (xdp_prog) { - u32 act = netif_receive_generic_xdp(skb, xdp_prog); - - if (act != XDP_PASS) { - rcu_read_unlock(); - if (act == XDP_TX) - generic_xdp_tx(skb, xdp_prog); - return NET_RX_DROP; - } + if (ret != XDP_PASS) { + rcu_read_unlock(); + return NET_RX_DROP; } } @@ -6689,8 +6718,12 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) */ ret = 0; - if ((old_flags ^ flags) & IFF_UP) - ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev); + if ((old_flags ^ flags) & IFF_UP) { + if (old_flags & IFF_UP) + __dev_close(dev); + else + ret = __dev_open(dev); + } if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? 1 : -1; @@ -7235,24 +7268,6 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~NETIF_F_GSO; } - /* UFO needs SG and checksumming */ - if (features & NETIF_F_UFO) { - /* maybe split UFO into V4 and V6? */ - if (!(features & NETIF_F_HW_CSUM) && - ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) != - (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) { - netdev_dbg(dev, - "Dropping NETIF_F_UFO since no checksum offload features.\n"); - features &= ~NETIF_F_UFO; - } - - if (!(features & NETIF_F_SG)) { - netdev_dbg(dev, - "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); - features &= ~NETIF_F_UFO; - } - } - /* GSO partial features require GSO partial be set */ if ((features & dev->gso_partial_features) && !(features & NETIF_F_GSO_PARTIAL)) { @@ -7313,8 +7328,27 @@ sync_lower: netdev_for_each_lower_dev(dev, lower, iter) netdev_sync_lower_features(dev, lower, features); - if (!err) + if (!err) { + netdev_features_t diff = features ^ dev->features; + + if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) { + /* udp_tunnel_{get,drop}_rx_info both need + * NETIF_F_RX_UDP_TUNNEL_PORT enabled on the + * device, or they won't do anything. + * Thus we need to update dev->features + * *before* calling udp_tunnel_get_rx_info, + * but *after* calling udp_tunnel_drop_rx_info. + */ + if (features & NETIF_F_RX_UDP_TUNNEL_PORT) { + dev->features = features; + udp_tunnel_get_rx_info(dev); + } else { + udp_tunnel_drop_rx_info(dev); + } + } + dev->features = features; + } return err < 0 ? 0 : 1; } @@ -7516,6 +7550,12 @@ int register_netdevice(struct net_device *dev) */ dev->hw_features |= NETIF_F_SOFT_FEATURES; dev->features |= NETIF_F_SOFT_FEATURES; + + if (dev->netdev_ops->ndo_udp_tunnel_add) { + dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT; + dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT; + } + dev->wanted_features = dev->features & dev->hw_features; if (!(dev->flags & IFF_LOOPBACK)) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 674b6c9cec18..6a582ae4c5d9 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -76,7 +76,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_LRO_BIT] = "rx-lro", [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", - [NETIF_F_UFO_BIT] = "tx-udp-fragmentation", [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", @@ -106,6 +105,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_HW_TC_BIT] = "hw-tc-offload", [NETIF_F_HW_ESP_BIT] = "esp-hw-offload", [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload", + [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload", }; static const char @@ -299,9 +299,6 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) case ETHTOOL_GTSO: case ETHTOOL_STSO: return NETIF_F_ALL_TSO; - case ETHTOOL_GUFO: - case ETHTOOL_SUFO: - return NETIF_F_UFO; case ETHTOOL_GGSO: case ETHTOOL_SGSO: return NETIF_F_GSO; @@ -2515,6 +2512,33 @@ static int set_phy_tunable(struct net_device *dev, void __user *useraddr) return ret; } +static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_fecparam fecparam = { ETHTOOL_GFECPARAM }; + + if (!dev->ethtool_ops->get_fecparam) + return -EOPNOTSUPP; + + dev->ethtool_ops->get_fecparam(dev, &fecparam); + + if (copy_to_user(useraddr, &fecparam, sizeof(fecparam))) + return -EFAULT; + return 0; +} + +static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_fecparam fecparam; + + if (!dev->ethtool_ops->set_fecparam) + return -EOPNOTSUPP; + + if (copy_from_user(&fecparam, useraddr, sizeof(fecparam))) + return -EFAULT; + + return dev->ethtool_ops->set_fecparam(dev, &fecparam); +} + /* The main entry point in this file. Called from net/core/dev_ioctl.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) @@ -2555,7 +2579,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPHYSTATS: case ETHTOOL_GTSO: case ETHTOOL_GPERMADDR: - case ETHTOOL_GUFO: case ETHTOOL_GGSO: case ETHTOOL_GGRO: case ETHTOOL_GFLAGS: @@ -2574,6 +2597,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GTUNABLE: case ETHTOOL_PHY_GTUNABLE: case ETHTOOL_GLINKSETTINGS: + case ETHTOOL_GFECPARAM: break; default: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) @@ -2723,7 +2747,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCSUM: case ETHTOOL_GSG: case ETHTOOL_GTSO: - case ETHTOOL_GUFO: case ETHTOOL_GGSO: case ETHTOOL_GGRO: rc = ethtool_get_one_feature(dev, useraddr, ethcmd); @@ -2732,7 +2755,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXCSUM: case ETHTOOL_SSG: case ETHTOOL_STSO: - case ETHTOOL_SUFO: case ETHTOOL_SGSO: case ETHTOOL_SGRO: rc = ethtool_set_one_feature(dev, useraddr, ethcmd); @@ -2785,6 +2807,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_PHY_STUNABLE: rc = set_phy_tunable(dev, useraddr); break; + case ETHTOOL_GFECPARAM: + rc = ethtool_get_fecparam(dev, useraddr); + break; + case ETHTOOL_SFECPARAM: + rc = ethtool_set_fecparam(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c new file mode 100644 index 000000000000..292aab83702f --- /dev/null +++ b/net/core/fib_notifier.c @@ -0,0 +1,164 @@ +#include <linux/rtnetlink.h> +#include <linux/notifier.h> +#include <linux/rcupdate.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <net/net_namespace.h> +#include <net/fib_notifier.h> + +static ATOMIC_NOTIFIER_HEAD(fib_chain); + +int call_fib_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->net = net; + return nb->notifier_call(nb, event_type, info); +} +EXPORT_SYMBOL(call_fib_notifier); + +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->net = net; + return atomic_notifier_call_chain(&fib_chain, event_type, info); +} +EXPORT_SYMBOL(call_fib_notifiers); + +static unsigned int fib_seq_sum(void) +{ + struct fib_notifier_ops *ops; + unsigned int fib_seq = 0; + struct net *net; + + rtnl_lock(); + for_each_net(net) { + list_for_each_entry(ops, &net->fib_notifier_ops, list) + fib_seq += ops->fib_seq_read(net); + } + rtnl_unlock(); + + return fib_seq; +} + +static int fib_net_dump(struct net *net, struct notifier_block *nb) +{ + struct fib_notifier_ops *ops; + + list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { + int err = ops->fib_dump(net, nb); + + if (err) + return err; + } + + return 0; +} + +static bool fib_dump_is_consistent(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb), + unsigned int fib_seq) +{ + atomic_notifier_chain_register(&fib_chain, nb); + if (fib_seq == fib_seq_sum()) + return true; + atomic_notifier_chain_unregister(&fib_chain, nb); + if (cb) + cb(nb); + return false; +} + +#define FIB_DUMP_MAX_RETRIES 5 +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)) +{ + int retries = 0; + int err; + + do { + unsigned int fib_seq = fib_seq_sum(); + struct net *net; + + rcu_read_lock(); + for_each_net_rcu(net) { + err = fib_net_dump(net, nb); + if (err) + goto err_fib_net_dump; + } + rcu_read_unlock(); + + if (fib_dump_is_consistent(nb, cb, fib_seq)) + return 0; + } while (++retries < FIB_DUMP_MAX_RETRIES); + + return -EBUSY; + +err_fib_net_dump: + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(register_fib_notifier); + +int unregister_fib_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&fib_chain, nb); +} +EXPORT_SYMBOL(unregister_fib_notifier); + +static int __fib_notifier_ops_register(struct fib_notifier_ops *ops, + struct net *net) +{ + struct fib_notifier_ops *o; + + list_for_each_entry(o, &net->fib_notifier_ops, list) + if (ops->family == o->family) + return -EEXIST; + list_add_tail_rcu(&ops->list, &net->fib_notifier_ops); + return 0; +} + +struct fib_notifier_ops * +fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net) +{ + struct fib_notifier_ops *ops; + int err; + + ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL); + if (!ops) + return ERR_PTR(-ENOMEM); + + err = __fib_notifier_ops_register(ops, net); + if (err) + goto err_register; + + return ops; + +err_register: + kfree(ops); + return ERR_PTR(err); +} +EXPORT_SYMBOL(fib_notifier_ops_register); + +void fib_notifier_ops_unregister(struct fib_notifier_ops *ops) +{ + list_del_rcu(&ops->list); + kfree_rcu(ops, rcu); +} +EXPORT_SYMBOL(fib_notifier_ops_unregister); + +static int __net_init fib_notifier_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->fib_notifier_ops); + return 0; +} + +static struct pernet_operations fib_notifier_net_ops = { + .init = fib_notifier_net_init, +}; + +static int __init fib_notifier_init(void) +{ + return register_pernet_subsys(&fib_notifier_net_ops); +} + +subsys_initcall(fib_notifier_init); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index fdcb1bcd2afa..9a6d97c1d810 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -299,6 +299,67 @@ out: } EXPORT_SYMBOL_GPL(fib_rules_lookup); +static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_rule *rule, int family) +{ + struct fib_rule_notifier_info info = { + .info.family = family, + .rule = rule, + }; + + return call_fib_notifier(nb, net, event_type, &info.info); +} + +static int call_fib_rule_notifiers(struct net *net, + enum fib_event_type event_type, + struct fib_rule *rule, + struct fib_rules_ops *ops) +{ + struct fib_rule_notifier_info info = { + .info.family = ops->family, + .rule = rule, + }; + + ops->fib_rules_seq++; + return call_fib_notifiers(net, event_type, &info.info); +} + +/* Called with rcu_read_lock() */ +int fib_rules_dump(struct net *net, struct notifier_block *nb, int family) +{ + struct fib_rules_ops *ops; + struct fib_rule *rule; + + ops = lookup_rules_ops(net, family); + if (!ops) + return -EAFNOSUPPORT; + list_for_each_entry_rcu(rule, &ops->rules_list, list) + call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule, + family); + rules_ops_put(ops); + + return 0; +} +EXPORT_SYMBOL_GPL(fib_rules_dump); + +unsigned int fib_rules_seq_read(struct net *net, int family) +{ + unsigned int fib_rules_seq; + struct fib_rules_ops *ops; + + ASSERT_RTNL(); + + ops = lookup_rules_ops(net, family); + if (!ops) + return 0; + fib_rules_seq = ops->fib_rules_seq; + rules_ops_put(ops); + + return fib_rules_seq; +} +EXPORT_SYMBOL_GPL(fib_rules_seq_read); + static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, struct fib_rules_ops *ops) { @@ -548,6 +609,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, if (rule->tun_id) ip_tunnel_need_metadata(); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops); notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); flush_route_cache(ops); rules_ops_put(ops); @@ -687,6 +749,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, } } + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops); notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid); fib_rule_put(rule); @@ -963,9 +1026,9 @@ static struct pernet_operations fib_rules_net_ops = { static int __init fib_rules_init(void) { int err; - rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, 0); err = register_pernet_subsys(&fib_rules_net_ops); if (err < 0) diff --git a/net/core/filter.c b/net/core/filter.c index f44fc22fd45a..5afe3ac191ec 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -55,6 +55,7 @@ #include <net/sock_reuseport.h> #include <net/busy_poll.h> #include <net/tcp.h> +#include <linux/bpf_trace.h> /** * sk_filter_trim_cap - run a packet through a socket filter @@ -513,14 +514,27 @@ do_pass: break; } - /* Convert JEQ into JNE when 'jump_true' is next insn. */ - if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { - insn->code = BPF_JMP | BPF_JNE | bpf_src; + /* Convert some jumps when 'jump_true' is next insn. */ + if (fp->jt == 0) { + switch (BPF_OP(fp->code)) { + case BPF_JEQ: + insn->code = BPF_JMP | BPF_JNE | bpf_src; + break; + case BPF_JGT: + insn->code = BPF_JMP | BPF_JLE | bpf_src; + break; + case BPF_JGE: + insn->code = BPF_JMP | BPF_JLT | bpf_src; + break; + default: + goto jmp_rest; + } + target = i + fp->jf + 1; BPF_EMIT_JMP; break; } - +jmp_rest: /* Other jumps are mapped into two insns: Jxx and JA. */ target = i + fp->jt + 1; insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; @@ -1778,6 +1792,8 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = { struct redirect_info { u32 ifindex; u32 flags; + struct bpf_map *map; + struct bpf_map *map_to_flush; }; static DEFINE_PER_CPU(struct redirect_info, redirect_info); @@ -1791,6 +1807,7 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) ri->ifindex = ifindex; ri->flags = flags; + ri->map = NULL; return TC_ACT_REDIRECT; } @@ -1818,6 +1835,29 @@ static const struct bpf_func_proto bpf_redirect_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_3(bpf_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + + if (unlikely(flags)) + return XDP_ABORTED; + + ri->ifindex = ifindex; + ri->flags = flags; + ri->map = map; + + return XDP_REDIRECT; +} + +static const struct bpf_func_proto bpf_redirect_map_proto = { + .func = bpf_redirect_map, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) { return task_get_classid(skb); @@ -2024,8 +2064,8 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) return ret; if (skb_is_gso(skb)) { - /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to - * be changed into SKB_GSO_TCPV6. + /* SKB_GSO_TCPV4 needs to be changed into + * SKB_GSO_TCPV6. */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4; @@ -2060,8 +2100,8 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) return ret; if (skb_is_gso(skb)) { - /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to - * be changed into SKB_GSO_TCPV4. + /* SKB_GSO_TCPV6 needs to be changed into + * SKB_GSO_TCPV4. */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6; @@ -2412,6 +2452,142 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; +static int __bpf_tx_xdp(struct net_device *dev, + struct bpf_map *map, + struct xdp_buff *xdp, + u32 index) +{ + int err; + + if (!dev->netdev_ops->ndo_xdp_xmit) { + bpf_warn_invalid_xdp_redirect(dev->ifindex); + return -EOPNOTSUPP; + } + + err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); + if (err) + return err; + + if (map) + __dev_map_insert_ctx(map, index); + else + dev->netdev_ops->ndo_xdp_flush(dev); + + return err; +} + +void xdp_do_flush_map(void) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct bpf_map *map = ri->map_to_flush; + + ri->map = NULL; + ri->map_to_flush = NULL; + + if (map) + __dev_map_flush(map); +} +EXPORT_SYMBOL_GPL(xdp_do_flush_map); + +int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct bpf_map *map = ri->map; + u32 index = ri->ifindex; + struct net_device *fwd; + int err = -EINVAL; + + ri->ifindex = 0; + ri->map = NULL; + + fwd = __dev_map_lookup_elem(map, index); + if (!fwd) + goto out; + + if (ri->map_to_flush && (ri->map_to_flush != map)) + xdp_do_flush_map(); + + err = __bpf_tx_xdp(fwd, map, xdp, index); + if (likely(!err)) + ri->map_to_flush = map; + +out: + trace_xdp_redirect(dev, fwd, xdp_prog, XDP_REDIRECT); + return err; +} + +int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct net_device *fwd; + u32 index = ri->ifindex; + + if (ri->map) + return xdp_do_redirect_map(dev, xdp, xdp_prog); + + fwd = dev_get_by_index_rcu(dev_net(dev), index); + ri->ifindex = 0; + ri->map = NULL; + if (unlikely(!fwd)) { + bpf_warn_invalid_xdp_redirect(index); + return -EINVAL; + } + + trace_xdp_redirect(dev, fwd, xdp_prog, XDP_REDIRECT); + + return __bpf_tx_xdp(fwd, NULL, xdp, 0); +} +EXPORT_SYMBOL_GPL(xdp_do_redirect); + +int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + unsigned int len; + u32 index = ri->ifindex; + + dev = dev_get_by_index_rcu(dev_net(dev), index); + ri->ifindex = 0; + if (unlikely(!dev)) { + bpf_warn_invalid_xdp_redirect(index); + goto err; + } + + if (unlikely(!(dev->flags & IFF_UP))) + goto err; + + len = dev->mtu + dev->hard_header_len + VLAN_HLEN; + if (skb->len > len) + goto err; + + skb->dev = dev; + return 0; +err: + return -EINVAL; +} +EXPORT_SYMBOL_GPL(xdp_do_generic_redirect); + +BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + + if (unlikely(flags)) + return XDP_ABORTED; + + ri->ifindex = ifindex; + ri->flags = flags; + return XDP_REDIRECT; +} + +static const struct bpf_func_proto bpf_xdp_redirect_proto = { + .func = bpf_xdp_redirect, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, +}; + bool bpf_helper_changes_pkt_data(void *func) { if (func == bpf_skb_vlan_push || @@ -3011,6 +3187,10 @@ xdp_func_proto(enum bpf_func_id func_id) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_xdp_adjust_head: return &bpf_xdp_adjust_head_proto; + case BPF_FUNC_redirect: + return &bpf_xdp_redirect_proto; + case BPF_FUNC_redirect_map: + return &bpf_redirect_map_proto; default: return bpf_base_func_proto(func_id); } @@ -3310,6 +3490,11 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); +void bpf_warn_invalid_xdp_redirect(u32 ifindex) +{ + WARN_ONCE(1, "Illegal XDP redirect to unsupported device ifindex(%i)\n", ifindex); +} + static bool __is_valid_sock_ops_access(int off, int size) { if (off < 0 || off >= sizeof(struct bpf_sock_ops)) diff --git a/net/core/flow.c b/net/core/flow.c deleted file mode 100644 index f7f5d1932a27..000000000000 --- a/net/core/flow.c +++ /dev/null @@ -1,516 +0,0 @@ -/* flow.c: Generic flow cache. - * - * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru) - * Copyright (C) 2003 David S. Miller (davem@redhat.com) - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/list.h> -#include <linux/jhash.h> -#include <linux/interrupt.h> -#include <linux/mm.h> -#include <linux/random.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/smp.h> -#include <linux/completion.h> -#include <linux/percpu.h> -#include <linux/bitops.h> -#include <linux/notifier.h> -#include <linux/cpu.h> -#include <linux/cpumask.h> -#include <linux/mutex.h> -#include <net/flow.h> -#include <linux/atomic.h> -#include <linux/security.h> -#include <net/net_namespace.h> - -struct flow_cache_entry { - union { - struct hlist_node hlist; - struct list_head gc_list; - } u; - struct net *net; - u16 family; - u8 dir; - u32 genid; - struct flowi key; - struct flow_cache_object *object; -}; - -struct flow_flush_info { - struct flow_cache *cache; - atomic_t cpuleft; - struct completion completion; -}; - -static struct kmem_cache *flow_cachep __read_mostly; - -#define flow_cache_hash_size(cache) (1U << (cache)->hash_shift) -#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) - -static void flow_cache_new_hashrnd(unsigned long arg) -{ - struct flow_cache *fc = (void *) arg; - int i; - - for_each_possible_cpu(i) - per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1; - - fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; - add_timer(&fc->rnd_timer); -} - -static int flow_entry_valid(struct flow_cache_entry *fle, - struct netns_xfrm *xfrm) -{ - if (atomic_read(&xfrm->flow_cache_genid) != fle->genid) - return 0; - if (fle->object && !fle->object->ops->check(fle->object)) - return 0; - return 1; -} - -static void flow_entry_kill(struct flow_cache_entry *fle, - struct netns_xfrm *xfrm) -{ - if (fle->object) - fle->object->ops->delete(fle->object); - kmem_cache_free(flow_cachep, fle); -} - -static void flow_cache_gc_task(struct work_struct *work) -{ - struct list_head gc_list; - struct flow_cache_entry *fce, *n; - struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, - flow_cache_gc_work); - - INIT_LIST_HEAD(&gc_list); - spin_lock_bh(&xfrm->flow_cache_gc_lock); - list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); - spin_unlock_bh(&xfrm->flow_cache_gc_lock); - - list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) { - flow_entry_kill(fce, xfrm); - atomic_dec(&xfrm->flow_cache_gc_count); - } -} - -static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, - unsigned int deleted, - struct list_head *gc_list, - struct netns_xfrm *xfrm) -{ - if (deleted) { - atomic_add(deleted, &xfrm->flow_cache_gc_count); - fcp->hash_count -= deleted; - spin_lock_bh(&xfrm->flow_cache_gc_lock); - list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); - spin_unlock_bh(&xfrm->flow_cache_gc_lock); - schedule_work(&xfrm->flow_cache_gc_work); - } -} - -static void __flow_cache_shrink(struct flow_cache *fc, - struct flow_cache_percpu *fcp, - unsigned int shrink_to) -{ - struct flow_cache_entry *fle; - struct hlist_node *tmp; - LIST_HEAD(gc_list); - unsigned int deleted = 0; - struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, - flow_cache_global); - unsigned int i; - - for (i = 0; i < flow_cache_hash_size(fc); i++) { - unsigned int saved = 0; - - hlist_for_each_entry_safe(fle, tmp, - &fcp->hash_table[i], u.hlist) { - if (saved < shrink_to && - flow_entry_valid(fle, xfrm)) { - saved++; - } else { - deleted++; - hlist_del(&fle->u.hlist); - list_add_tail(&fle->u.gc_list, &gc_list); - } - } - } - - flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); -} - -static void flow_cache_shrink(struct flow_cache *fc, - struct flow_cache_percpu *fcp) -{ - unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); - - __flow_cache_shrink(fc, fcp, shrink_to); -} - -static void flow_new_hash_rnd(struct flow_cache *fc, - struct flow_cache_percpu *fcp) -{ - get_random_bytes(&fcp->hash_rnd, sizeof(u32)); - fcp->hash_rnd_recalc = 0; - __flow_cache_shrink(fc, fcp, 0); -} - -static u32 flow_hash_code(struct flow_cache *fc, - struct flow_cache_percpu *fcp, - const struct flowi *key, - unsigned int keysize) -{ - const u32 *k = (const u32 *) key; - const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - - return jhash2(k, length, fcp->hash_rnd) - & (flow_cache_hash_size(fc) - 1); -} - -/* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment. - */ -static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, - unsigned int keysize) -{ - const flow_compare_t *k1, *k1_lim, *k2; - - k1 = (const flow_compare_t *) key1; - k1_lim = k1 + keysize; - - k2 = (const flow_compare_t *) key2; - - do { - if (*k1++ != *k2++) - return 1; - } while (k1 < k1_lim); - - return 0; -} - -struct flow_cache_object * -flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, - flow_resolve_t resolver, void *ctx) -{ - struct flow_cache *fc = &net->xfrm.flow_cache_global; - struct flow_cache_percpu *fcp; - struct flow_cache_entry *fle, *tfle; - struct flow_cache_object *flo; - unsigned int keysize; - unsigned int hash; - - local_bh_disable(); - fcp = this_cpu_ptr(fc->percpu); - - fle = NULL; - flo = NULL; - - keysize = flow_key_size(family); - if (!keysize) - goto nocache; - - /* Packet really early in init? Making flow_cache_init a - * pre-smp initcall would solve this. --RR */ - if (!fcp->hash_table) - goto nocache; - - if (fcp->hash_rnd_recalc) - flow_new_hash_rnd(fc, fcp); - - hash = flow_hash_code(fc, fcp, key, keysize); - hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) { - if (tfle->net == net && - tfle->family == family && - tfle->dir == dir && - flow_key_compare(key, &tfle->key, keysize) == 0) { - fle = tfle; - break; - } - } - - if (unlikely(!fle)) { - if (fcp->hash_count > fc->high_watermark) - flow_cache_shrink(fc, fcp); - - if (atomic_read(&net->xfrm.flow_cache_gc_count) > - 2 * num_online_cpus() * fc->high_watermark) { - flo = ERR_PTR(-ENOBUFS); - goto ret_object; - } - - fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); - if (fle) { - fle->net = net; - fle->family = family; - fle->dir = dir; - memcpy(&fle->key, key, keysize * sizeof(flow_compare_t)); - fle->object = NULL; - hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); - fcp->hash_count++; - } - } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) { - flo = fle->object; - if (!flo) - goto ret_object; - flo = flo->ops->get(flo); - if (flo) - goto ret_object; - } else if (fle->object) { - flo = fle->object; - flo->ops->delete(flo); - fle->object = NULL; - } - -nocache: - flo = NULL; - if (fle) { - flo = fle->object; - fle->object = NULL; - } - flo = resolver(net, key, family, dir, flo, ctx); - if (fle) { - fle->genid = atomic_read(&net->xfrm.flow_cache_genid); - if (!IS_ERR(flo)) - fle->object = flo; - else - fle->genid--; - } else { - if (!IS_ERR_OR_NULL(flo)) - flo->ops->delete(flo); - } -ret_object: - local_bh_enable(); - return flo; -} -EXPORT_SYMBOL(flow_cache_lookup); - -static void flow_cache_flush_tasklet(unsigned long data) -{ - struct flow_flush_info *info = (void *)data; - struct flow_cache *fc = info->cache; - struct flow_cache_percpu *fcp; - struct flow_cache_entry *fle; - struct hlist_node *tmp; - LIST_HEAD(gc_list); - unsigned int deleted = 0; - struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, - flow_cache_global); - unsigned int i; - - fcp = this_cpu_ptr(fc->percpu); - for (i = 0; i < flow_cache_hash_size(fc); i++) { - hlist_for_each_entry_safe(fle, tmp, - &fcp->hash_table[i], u.hlist) { - if (flow_entry_valid(fle, xfrm)) - continue; - - deleted++; - hlist_del(&fle->u.hlist); - list_add_tail(&fle->u.gc_list, &gc_list); - } - } - - flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); - - if (atomic_dec_and_test(&info->cpuleft)) - complete(&info->completion); -} - -/* - * Return whether a cpu needs flushing. Conservatively, we assume - * the presence of any entries means the core may require flushing, - * since the flow_cache_ops.check() function may assume it's running - * on the same core as the per-cpu cache component. - */ -static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu) -{ - struct flow_cache_percpu *fcp; - unsigned int i; - - fcp = per_cpu_ptr(fc->percpu, cpu); - for (i = 0; i < flow_cache_hash_size(fc); i++) - if (!hlist_empty(&fcp->hash_table[i])) - return 0; - return 1; -} - -static void flow_cache_flush_per_cpu(void *data) -{ - struct flow_flush_info *info = data; - struct tasklet_struct *tasklet; - - tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet; - tasklet->data = (unsigned long)info; - tasklet_schedule(tasklet); -} - -void flow_cache_flush(struct net *net) -{ - struct flow_flush_info info; - cpumask_var_t mask; - int i, self; - - /* Track which cpus need flushing to avoid disturbing all cores. */ - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) - return; - cpumask_clear(mask); - - /* Don't want cpus going down or up during this. */ - get_online_cpus(); - mutex_lock(&net->xfrm.flow_flush_sem); - info.cache = &net->xfrm.flow_cache_global; - for_each_online_cpu(i) - if (!flow_cache_percpu_empty(info.cache, i)) - cpumask_set_cpu(i, mask); - atomic_set(&info.cpuleft, cpumask_weight(mask)); - if (atomic_read(&info.cpuleft) == 0) - goto done; - - init_completion(&info.completion); - - local_bh_disable(); - self = cpumask_test_and_clear_cpu(smp_processor_id(), mask); - on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0); - if (self) - flow_cache_flush_tasklet((unsigned long)&info); - local_bh_enable(); - - wait_for_completion(&info.completion); - -done: - mutex_unlock(&net->xfrm.flow_flush_sem); - put_online_cpus(); - free_cpumask_var(mask); -} - -static void flow_cache_flush_task(struct work_struct *work) -{ - struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, - flow_cache_flush_work); - struct net *net = container_of(xfrm, struct net, xfrm); - - flow_cache_flush(net); -} - -void flow_cache_flush_deferred(struct net *net) -{ - schedule_work(&net->xfrm.flow_cache_flush_work); -} - -static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) -{ - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); - - if (!fcp->hash_table) { - fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); - if (!fcp->hash_table) { - pr_err("NET: failed to allocate flow cache sz %u\n", sz); - return -ENOMEM; - } - fcp->hash_rnd_recalc = 1; - fcp->hash_count = 0; - tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); - } - return 0; -} - -static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node) -{ - struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node); - - return flow_cache_cpu_prepare(fc, cpu); -} - -static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node) -{ - struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node); - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - - __flow_cache_shrink(fc, fcp, 0); - return 0; -} - -int flow_cache_init(struct net *net) -{ - int i; - struct flow_cache *fc = &net->xfrm.flow_cache_global; - - if (!flow_cachep) - flow_cachep = kmem_cache_create("flow_cache", - sizeof(struct flow_cache_entry), - 0, SLAB_PANIC, NULL); - spin_lock_init(&net->xfrm.flow_cache_gc_lock); - INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list); - INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); - INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); - mutex_init(&net->xfrm.flow_flush_sem); - atomic_set(&net->xfrm.flow_cache_gc_count, 0); - - fc->hash_shift = 10; - fc->low_watermark = 2 * flow_cache_hash_size(fc); - fc->high_watermark = 4 * flow_cache_hash_size(fc); - - fc->percpu = alloc_percpu(struct flow_cache_percpu); - if (!fc->percpu) - return -ENOMEM; - - if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node)) - goto err; - - setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, - (unsigned long) fc); - fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; - add_timer(&fc->rnd_timer); - - return 0; - -err: - for_each_possible_cpu(i) { - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); - kfree(fcp->hash_table); - fcp->hash_table = NULL; - } - - free_percpu(fc->percpu); - fc->percpu = NULL; - - return -ENOMEM; -} -EXPORT_SYMBOL(flow_cache_init); - -void flow_cache_fini(struct net *net) -{ - int i; - struct flow_cache *fc = &net->xfrm.flow_cache_global; - - del_timer_sync(&fc->rnd_timer); - - cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node); - - for_each_possible_cpu(i) { - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); - kfree(fcp->hash_table); - fcp->hash_table = NULL; - } - - free_percpu(fc->percpu); - fc->percpu = NULL; -} -EXPORT_SYMBOL(flow_cache_fini); - -void __init flow_cache_hp_init(void) -{ - int ret; - - ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE, - "net/flow:prepare", - flow_cache_cpu_up_prep, - flow_cache_cpu_dead); - WARN_ON(ret < 0); -} diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index fc5fc4594c90..79b9c06c83ad 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -4,6 +4,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/if_vlan.h> +#include <net/dsa.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/gre.h> @@ -440,6 +441,19 @@ bool __skb_flow_dissect(const struct sk_buff *skb, skb->vlan_proto : skb->protocol; nhoff = skb_network_offset(skb); hlen = skb_headlen(skb); +#if IS_ENABLED(CONFIG_NET_DSA) + if (unlikely(netdev_uses_dsa(skb->dev))) { + const struct dsa_device_ops *ops; + int offset; + + ops = skb->dev->dsa_ptr->tag_ops; + if (ops->flow_dissect && + !ops->flow_dissect(skb, &proto, &offset)) { + hlen -= offset; + nhoff += offset; + } + } +#endif } /* It is ensured by skb_flow_dissector_init() that control key will @@ -998,51 +1012,6 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) } EXPORT_SYMBOL(skb_get_hash_perturb); -__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6) -{ - struct flow_keys keys; - - memset(&keys, 0, sizeof(keys)); - - memcpy(&keys.addrs.v6addrs.src, &fl6->saddr, - sizeof(keys.addrs.v6addrs.src)); - memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr, - sizeof(keys.addrs.v6addrs.dst)); - keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; - keys.ports.src = fl6->fl6_sport; - keys.ports.dst = fl6->fl6_dport; - keys.keyid.keyid = fl6->fl6_gre_key; - keys.tags.flow_label = (__force u32)fl6->flowlabel; - keys.basic.ip_proto = fl6->flowi6_proto; - - __skb_set_sw_hash(skb, flow_hash_from_keys(&keys), - flow_keys_have_l4(&keys)); - - return skb->hash; -} -EXPORT_SYMBOL(__skb_get_hash_flowi6); - -__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4) -{ - struct flow_keys keys; - - memset(&keys, 0, sizeof(keys)); - - keys.addrs.v4addrs.src = fl4->saddr; - keys.addrs.v4addrs.dst = fl4->daddr; - keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; - keys.ports.src = fl4->fl4_sport; - keys.ports.dst = fl4->fl4_dport; - keys.keyid.keyid = fl4->fl4_gre_key; - keys.basic.ip_proto = fl4->flowi4_proto; - - __skb_set_sw_hash(skb, flow_hash_from_keys(&keys), - flow_keys_have_l4(&keys)); - - return skb->hash; -} -EXPORT_SYMBOL(__skb_get_hash_flowi4); - u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen) { diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index d9cb3532f1dd..0b171756453c 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -44,6 +44,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "SEG6"; case LWTUNNEL_ENCAP_BPF: return "BPF"; + case LWTUNNEL_ENCAP_SEG6_LOCAL: + return "SEG6LOCAL"; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: case LWTUNNEL_ENCAP_NONE: @@ -65,7 +67,7 @@ struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) return lws; } -EXPORT_SYMBOL(lwtunnel_state_alloc); +EXPORT_SYMBOL_GPL(lwtunnel_state_alloc); static const struct lwtunnel_encap_ops __rcu * lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; @@ -80,7 +82,7 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, &lwtun_encaps[num], NULL, ops) ? 0 : -1; } -EXPORT_SYMBOL(lwtunnel_encap_add_ops); +EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, unsigned int encap_type) @@ -99,7 +101,7 @@ int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, return ret; } -EXPORT_SYMBOL(lwtunnel_encap_del_ops); +EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops); int lwtunnel_build_state(u16 encap_type, struct nlattr *encap, unsigned int family, @@ -138,7 +140,7 @@ int lwtunnel_build_state(u16 encap_type, return ret; } -EXPORT_SYMBOL(lwtunnel_build_state); +EXPORT_SYMBOL_GPL(lwtunnel_build_state); int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) { @@ -175,7 +177,7 @@ int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) return ret; } -EXPORT_SYMBOL(lwtunnel_valid_encap_type); +EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, struct netlink_ext_ack *extack) @@ -205,7 +207,7 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, return 0; } -EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr); +EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr); void lwtstate_free(struct lwtunnel_state *lws) { @@ -219,7 +221,7 @@ void lwtstate_free(struct lwtunnel_state *lws) } module_put(ops->owner); } -EXPORT_SYMBOL(lwtstate_free); +EXPORT_SYMBOL_GPL(lwtstate_free); int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { @@ -259,7 +261,7 @@ nla_put_failure: return (ret == -EOPNOTSUPP ? 0 : ret); } -EXPORT_SYMBOL(lwtunnel_fill_encap); +EXPORT_SYMBOL_GPL(lwtunnel_fill_encap); int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) { @@ -281,7 +283,7 @@ int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) return ret; } -EXPORT_SYMBOL(lwtunnel_get_encap_size); +EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size); int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) { @@ -309,7 +311,7 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) return ret; } -EXPORT_SYMBOL(lwtunnel_cmp_encap); +EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -343,7 +345,7 @@ drop: return ret; } -EXPORT_SYMBOL(lwtunnel_output); +EXPORT_SYMBOL_GPL(lwtunnel_output); int lwtunnel_xmit(struct sk_buff *skb) { @@ -378,7 +380,7 @@ drop: return ret; } -EXPORT_SYMBOL(lwtunnel_xmit); +EXPORT_SYMBOL_GPL(lwtunnel_xmit); int lwtunnel_input(struct sk_buff *skb) { @@ -412,4 +414,4 @@ drop: return ret; } -EXPORT_SYMBOL(lwtunnel_input); +EXPORT_SYMBOL_GPL(lwtunnel_input); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d0713627deb6..16a1a4c4eb57 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3261,13 +3261,13 @@ EXPORT_SYMBOL(neigh_sysctl_unregister); static int __init neigh_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0); rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, - NULL); - rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL); + 0); + rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0); return 0; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 8726d051f31d..6cfdc7c84c48 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -855,9 +855,10 @@ static int __init net_ns_init(void) register_pernet_subsys(&net_ns_ops); - rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, + RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, - NULL); + RTNL_FLAG_DOIT_UNLOCKED); return 0; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9201e3621351..9e9f1419be60 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -62,7 +62,7 @@ struct rtnl_link { rtnl_doit_func doit; rtnl_dumpit_func dumpit; - rtnl_calcit_func calcit; + unsigned int flags; }; static DEFINE_MUTEX(rtnl_mutex); @@ -127,7 +127,8 @@ bool lockdep_rtnl_is_held(void) EXPORT_SYMBOL(lockdep_rtnl_is_held); #endif /* #ifdef CONFIG_PROVE_LOCKING */ -static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; +static struct rtnl_link __rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; +static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1]; static inline int rtm_msgindex(int msgtype) { @@ -143,58 +144,13 @@ static inline int rtm_msgindex(int msgtype) return msgindex; } -static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex) -{ - struct rtnl_link *tab; - - if (protocol <= RTNL_FAMILY_MAX) - tab = rtnl_msg_handlers[protocol]; - else - tab = NULL; - - if (tab == NULL || tab[msgindex].doit == NULL) - tab = rtnl_msg_handlers[PF_UNSPEC]; - - return tab[msgindex].doit; -} - -static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) -{ - struct rtnl_link *tab; - - if (protocol <= RTNL_FAMILY_MAX) - tab = rtnl_msg_handlers[protocol]; - else - tab = NULL; - - if (tab == NULL || tab[msgindex].dumpit == NULL) - tab = rtnl_msg_handlers[PF_UNSPEC]; - - return tab[msgindex].dumpit; -} - -static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) -{ - struct rtnl_link *tab; - - if (protocol <= RTNL_FAMILY_MAX) - tab = rtnl_msg_handlers[protocol]; - else - tab = NULL; - - if (tab == NULL || tab[msgindex].calcit == NULL) - tab = rtnl_msg_handlers[PF_UNSPEC]; - - return tab[msgindex].calcit; -} - /** * __rtnl_register - Register a rtnetlink message type * @protocol: Protocol family or PF_UNSPEC * @msgtype: rtnetlink message type * @doit: Function pointer called for each request message * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * @calcit: Function pointer to calc size of dump message + * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions * * Registers the specified function pointers (at least one of them has * to be non-NULL) to be called whenever a request message for the @@ -208,7 +164,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) */ int __rtnl_register(int protocol, int msgtype, rtnl_doit_func doit, rtnl_dumpit_func dumpit, - rtnl_calcit_func calcit) + unsigned int flags) { struct rtnl_link *tab; int msgindex; @@ -216,23 +172,20 @@ int __rtnl_register(int protocol, int msgtype, BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype); - tab = rtnl_msg_handlers[protocol]; + tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]); if (tab == NULL) { tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL); if (tab == NULL) return -ENOBUFS; - rtnl_msg_handlers[protocol] = tab; + rcu_assign_pointer(rtnl_msg_handlers[protocol], tab); } if (doit) tab[msgindex].doit = doit; - if (dumpit) tab[msgindex].dumpit = dumpit; - - if (calcit) - tab[msgindex].calcit = calcit; + tab[msgindex].flags |= flags; return 0; } @@ -249,9 +202,9 @@ EXPORT_SYMBOL_GPL(__rtnl_register); */ void rtnl_register(int protocol, int msgtype, rtnl_doit_func doit, rtnl_dumpit_func dumpit, - rtnl_calcit_func calcit) + unsigned int flags) { - if (__rtnl_register(protocol, msgtype, doit, dumpit, calcit) < 0) + if (__rtnl_register(protocol, msgtype, doit, dumpit, flags) < 0) panic("Unable to register rtnetlink message handler, " "protocol = %d, message type = %d\n", protocol, msgtype); @@ -267,17 +220,23 @@ EXPORT_SYMBOL_GPL(rtnl_register); */ int rtnl_unregister(int protocol, int msgtype) { + struct rtnl_link *handlers; int msgindex; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype); - if (rtnl_msg_handlers[protocol] == NULL) + rtnl_lock(); + handlers = rtnl_dereference(rtnl_msg_handlers[protocol]); + if (!handlers) { + rtnl_unlock(); return -ENOENT; + } - rtnl_msg_handlers[protocol][msgindex].doit = NULL; - rtnl_msg_handlers[protocol][msgindex].dumpit = NULL; - rtnl_msg_handlers[protocol][msgindex].calcit = NULL; + handlers[msgindex].doit = NULL; + handlers[msgindex].dumpit = NULL; + handlers[msgindex].flags = 0; + rtnl_unlock(); return 0; } @@ -292,10 +251,20 @@ EXPORT_SYMBOL_GPL(rtnl_unregister); */ void rtnl_unregister_all(int protocol) { + struct rtnl_link *handlers; + BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); - kfree(rtnl_msg_handlers[protocol]); - rtnl_msg_handlers[protocol] = NULL; + rtnl_lock(); + handlers = rtnl_dereference(rtnl_msg_handlers[protocol]); + RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL); + rtnl_unlock(); + + synchronize_net(); + + while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1) + schedule(); + kfree(handlers); } EXPORT_SYMBOL_GPL(rtnl_unregister_all); @@ -433,16 +402,24 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev) { struct net_device *master_dev; const struct rtnl_link_ops *ops; + size_t size = 0; - master_dev = netdev_master_upper_dev_get((struct net_device *) dev); + rcu_read_lock(); + + master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev); if (!master_dev) - return 0; + goto out; + ops = master_dev->rtnl_link_ops; if (!ops || !ops->get_slave_size) - return 0; + goto out; /* IFLA_INFO_SLAVE_DATA + nested data */ - return nla_total_size(sizeof(struct nlattr)) + + size = nla_total_size(sizeof(struct nlattr)) + ops->get_slave_size(master_dev, dev); + +out: + rcu_read_unlock(); + return size; } static size_t rtnl_link_get_size(const struct net_device *dev) @@ -2831,11 +2808,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) * traverse the list of net devices and compute the minimum * buffer size based upon the filter mask. */ - list_for_each_entry(dev, &net->dev_base_head, dev_list) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size, if_nlmsg_size(dev, ext_filter_mask)); } + rcu_read_unlock(); return nlmsg_total_size(min_ifinfo_dump_size); } @@ -2847,19 +2826,29 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) if (s_idx == 0) s_idx = 1; + for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) { int type = cb->nlh->nlmsg_type-RTM_BASE; + struct rtnl_link *handlers; + rtnl_dumpit_func dumpit; + if (idx < s_idx || idx == PF_PACKET) continue; - if (rtnl_msg_handlers[idx] == NULL || - rtnl_msg_handlers[idx][type].dumpit == NULL) + + handlers = rtnl_dereference(rtnl_msg_handlers[idx]); + if (!handlers) + continue; + + dumpit = READ_ONCE(handlers[type].dumpit); + if (!dumpit) continue; + if (idx > s_idx) { memset(&cb->args[0], 0, sizeof(cb->args)); cb->prev_seq = 0; cb->seq = 0; } - if (rtnl_msg_handlers[idx][type].dumpit(skb, cb)) + if (dumpit(skb, cb)) break; } cb->family = idx; @@ -4162,11 +4151,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); + struct rtnl_link *handlers; + int err = -EOPNOTSUPP; rtnl_doit_func doit; + unsigned int flags; int kind; int family; int type; - int err; type = nlh->nlmsg_type; if (type > RTM_MAX) @@ -4184,20 +4175,40 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; + if (family >= ARRAY_SIZE(rtnl_msg_handlers)) + family = PF_UNSPEC; + + rcu_read_lock(); + handlers = rcu_dereference(rtnl_msg_handlers[family]); + if (!handlers) { + family = PF_UNSPEC; + handlers = rcu_dereference(rtnl_msg_handlers[family]); + } + if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; - rtnl_calcit_func calcit; u16 min_dump_alloc = 0; - dumpit = rtnl_get_dumpit(family, type); - if (dumpit == NULL) - return -EOPNOTSUPP; - calcit = rtnl_get_calcit(family, type); - if (calcit) - min_dump_alloc = calcit(skb, nlh); + dumpit = READ_ONCE(handlers[type].dumpit); + if (!dumpit) { + family = PF_UNSPEC; + handlers = rcu_dereference(rtnl_msg_handlers[PF_UNSPEC]); + if (!handlers) + goto err_unlock; + + dumpit = READ_ONCE(handlers[type].dumpit); + if (!dumpit) + goto err_unlock; + } + + refcount_inc(&rtnl_msg_handlers_ref[family]); + + if (type == RTM_GETLINK - RTM_BASE) + min_dump_alloc = rtnl_calcit(skb, nlh); + + rcu_read_unlock(); - __rtnl_unlock(); rtnl = net->rtnl; { struct netlink_dump_control c = { @@ -4206,22 +4217,47 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, }; err = netlink_dump_start(rtnl, skb, nlh, &c); } - rtnl_lock(); + refcount_dec(&rtnl_msg_handlers_ref[family]); return err; } - doit = rtnl_get_doit(family, type); - if (doit == NULL) - return -EOPNOTSUPP; + doit = READ_ONCE(handlers[type].doit); + if (!doit) { + family = PF_UNSPEC; + handlers = rcu_dereference(rtnl_msg_handlers[family]); + } + + flags = READ_ONCE(handlers[type].flags); + if (flags & RTNL_FLAG_DOIT_UNLOCKED) { + refcount_inc(&rtnl_msg_handlers_ref[family]); + doit = READ_ONCE(handlers[type].doit); + rcu_read_unlock(); + if (doit) + err = doit(skb, nlh, extack); + refcount_dec(&rtnl_msg_handlers_ref[family]); + return err; + } + + rcu_read_unlock(); + + rtnl_lock(); + handlers = rtnl_dereference(rtnl_msg_handlers[family]); + if (handlers) { + doit = READ_ONCE(handlers[type].doit); + if (doit) + err = doit(skb, nlh, extack); + } + rtnl_unlock(); + return err; - return doit(skb, nlh, extack); +err_unlock: + rcu_read_unlock(); + return -EOPNOTSUPP; } static void rtnetlink_rcv(struct sk_buff *skb) { - rtnl_lock(); netlink_rcv_skb(skb, &rtnetlink_rcv_msg); - rtnl_unlock(); } static int rtnetlink_bind(struct net *net, int group) @@ -4294,29 +4330,34 @@ static struct pernet_operations rtnetlink_net_ops = { void __init rtnetlink_init(void) { + int i; + + for (i = 0; i < ARRAY_SIZE(rtnl_msg_handlers_ref); i++) + refcount_set(&rtnl_msg_handlers_ref[i], 1); + if (register_pernet_subsys(&rtnetlink_net_ops)) panic("rtnetlink_init: cannot initialize rtnetlink\n"); register_netdevice_notifier(&rtnetlink_dev_notifier); rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, - rtnl_dump_ifinfo, rtnl_calcit); - rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, NULL); + rtnl_dump_ifinfo, 0); + rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL); - rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL); - rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, NULL); + rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0); + rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0); + rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0); - rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); - rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); - rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); + rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, 0); - rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); - rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); - rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); + rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0); + rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump, - NULL); + 0); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f990eb8b30a9..cb123590c674 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -158,31 +158,6 @@ out: * */ -struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) -{ - struct sk_buff *skb; - - /* Get the HEAD */ - skb = kmem_cache_alloc_node(skbuff_head_cache, - gfp_mask & ~__GFP_DMA, node); - if (!skb) - goto out; - - /* - * Only clear those fields we need to clear, not those that we will - * actually initialise below. Hence, don't put any more fields after - * the tail pointer in struct sk_buff! - */ - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->head = NULL; - skb->truesize = sizeof(struct sk_buff); - refcount_set(&skb->users, 1); - - skb->mac_header = (typeof(skb->mac_header))~0U; -out: - return skb; -} - /** * __alloc_skb - allocate a network buffer * @size: size to allocate @@ -592,21 +567,10 @@ static void skb_release_data(struct sk_buff *skb) for (i = 0; i < shinfo->nr_frags; i++) __skb_frag_unref(&shinfo->frags[i]); - /* - * If skb buf is from userspace, we need to notify the caller - * the lower device DMA has done; - */ - if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) { - struct ubuf_info *uarg; - - uarg = shinfo->destructor_arg; - if (uarg->callback) - uarg->callback(uarg, true); - } - if (shinfo->frag_list) kfree_skb_list(shinfo->frag_list); + skb_zcopy_clear(skb, true); skb_free_head(skb); } @@ -720,14 +684,7 @@ EXPORT_SYMBOL(kfree_skb_list); */ void skb_tx_error(struct sk_buff *skb) { - if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { - struct ubuf_info *uarg; - - uarg = skb_shinfo(skb)->destructor_arg; - if (uarg->callback) - uarg->callback(uarg, false); - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; - } + skb_zcopy_clear(skb, true); } EXPORT_SYMBOL(skb_tx_error); @@ -762,8 +719,7 @@ void consume_stateless_skb(struct sk_buff *skb) return; trace_consume_skb(skb); - if (likely(skb->head)) - skb_release_data(skb); + skb_release_data(skb); kfree_skbmem(skb); } @@ -941,6 +897,273 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) } EXPORT_SYMBOL_GPL(skb_morph); +static int mm_account_pinned_pages(struct mmpin *mmp, size_t size) +{ + unsigned long max_pg, num_pg, new_pg, old_pg; + struct user_struct *user; + + if (capable(CAP_IPC_LOCK) || !size) + return 0; + + num_pg = (size >> PAGE_SHIFT) + 2; /* worst case */ + max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + user = mmp->user ? : current_user(); + + do { + old_pg = atomic_long_read(&user->locked_vm); + new_pg = old_pg + num_pg; + if (new_pg > max_pg) + return -ENOBUFS; + } while (atomic_long_cmpxchg(&user->locked_vm, old_pg, new_pg) != + old_pg); + + if (!mmp->user) { + mmp->user = get_uid(user); + mmp->num_pg = num_pg; + } else { + mmp->num_pg += num_pg; + } + + return 0; +} + +static void mm_unaccount_pinned_pages(struct mmpin *mmp) +{ + if (mmp->user) { + atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm); + free_uid(mmp->user); + } +} + +struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size) +{ + struct ubuf_info *uarg; + struct sk_buff *skb; + + WARN_ON_ONCE(!in_task()); + + if (!sock_flag(sk, SOCK_ZEROCOPY)) + return NULL; + + skb = sock_omalloc(sk, 0, GFP_KERNEL); + if (!skb) + return NULL; + + BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb)); + uarg = (void *)skb->cb; + uarg->mmp.user = NULL; + + if (mm_account_pinned_pages(&uarg->mmp, size)) { + kfree_skb(skb); + return NULL; + } + + uarg->callback = sock_zerocopy_callback; + uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1; + uarg->len = 1; + uarg->bytelen = size; + uarg->zerocopy = 1; + atomic_set(&uarg->refcnt, 0); + sock_hold(sk); + + return uarg; +} +EXPORT_SYMBOL_GPL(sock_zerocopy_alloc); + +static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg) +{ + return container_of((void *)uarg, struct sk_buff, cb); +} + +struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, + struct ubuf_info *uarg) +{ + if (uarg) { + const u32 byte_limit = 1 << 19; /* limit to a few TSO */ + u32 bytelen, next; + + /* realloc only when socket is locked (TCP, UDP cork), + * so uarg->len and sk_zckey access is serialized + */ + if (!sock_owned_by_user(sk)) { + WARN_ON_ONCE(1); + return NULL; + } + + bytelen = uarg->bytelen + size; + if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) { + /* TCP can create new skb to attach new uarg */ + if (sk->sk_type == SOCK_STREAM) + goto new_alloc; + return NULL; + } + + next = (u32)atomic_read(&sk->sk_zckey); + if ((u32)(uarg->id + uarg->len) == next) { + if (mm_account_pinned_pages(&uarg->mmp, size)) + return NULL; + uarg->len++; + uarg->bytelen = bytelen; + atomic_set(&sk->sk_zckey, ++next); + return uarg; + } + } + +new_alloc: + return sock_zerocopy_alloc(sk, size); +} +EXPORT_SYMBOL_GPL(sock_zerocopy_realloc); + +static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len) +{ + struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); + u32 old_lo, old_hi; + u64 sum_len; + + old_lo = serr->ee.ee_info; + old_hi = serr->ee.ee_data; + sum_len = old_hi - old_lo + 1ULL + len; + + if (sum_len >= (1ULL << 32)) + return false; + + if (lo != old_hi + 1) + return false; + + serr->ee.ee_data += len; + return true; +} + +void sock_zerocopy_callback(struct ubuf_info *uarg, bool success) +{ + struct sk_buff *tail, *skb = skb_from_uarg(uarg); + struct sock_exterr_skb *serr; + struct sock *sk = skb->sk; + struct sk_buff_head *q; + unsigned long flags; + u32 lo, hi; + u16 len; + + mm_unaccount_pinned_pages(&uarg->mmp); + + /* if !len, there was only 1 call, and it was aborted + * so do not queue a completion notification + */ + if (!uarg->len || sock_flag(sk, SOCK_DEAD)) + goto release; + + len = uarg->len; + lo = uarg->id; + hi = uarg->id + len - 1; + + serr = SKB_EXT_ERR(skb); + memset(serr, 0, sizeof(*serr)); + serr->ee.ee_errno = 0; + serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY; + serr->ee.ee_data = hi; + serr->ee.ee_info = lo; + if (!success) + serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED; + + q = &sk->sk_error_queue; + spin_lock_irqsave(&q->lock, flags); + tail = skb_peek_tail(q); + if (!tail || SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY || + !skb_zerocopy_notify_extend(tail, lo, len)) { + __skb_queue_tail(q, skb); + skb = NULL; + } + spin_unlock_irqrestore(&q->lock, flags); + + sk->sk_error_report(sk); + +release: + consume_skb(skb); + sock_put(sk); +} +EXPORT_SYMBOL_GPL(sock_zerocopy_callback); + +void sock_zerocopy_put(struct ubuf_info *uarg) +{ + if (uarg && atomic_dec_and_test(&uarg->refcnt)) { + if (uarg->callback) + uarg->callback(uarg, uarg->zerocopy); + else + consume_skb(skb_from_uarg(uarg)); + } +} +EXPORT_SYMBOL_GPL(sock_zerocopy_put); + +void sock_zerocopy_put_abort(struct ubuf_info *uarg) +{ + if (uarg) { + struct sock *sk = skb_from_uarg(uarg)->sk; + + atomic_dec(&sk->sk_zckey); + uarg->len--; + + /* sock_zerocopy_put expects a ref. Most sockets take one per + * skb, which is zero on abort. tcp_sendmsg holds one extra, to + * avoid an skb send inside the main loop triggering uarg free. + */ + if (sk->sk_type != SOCK_STREAM) + atomic_inc(&uarg->refcnt); + + sock_zerocopy_put(uarg); + } +} +EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort); + +extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, + struct iov_iter *from, size_t length); + +int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, + struct msghdr *msg, int len, + struct ubuf_info *uarg) +{ + struct ubuf_info *orig_uarg = skb_zcopy(skb); + struct iov_iter orig_iter = msg->msg_iter; + int err, orig_len = skb->len; + + /* An skb can only point to one uarg. This edge case happens when + * TCP appends to an skb, but zerocopy_realloc triggered a new alloc. + */ + if (orig_uarg && uarg != orig_uarg) + return -EEXIST; + + err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len); + if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) { + /* Streams do not free skb on error. Reset to prev state. */ + msg->msg_iter = orig_iter; + ___pskb_trim(skb, orig_len); + return err; + } + + skb_zcopy_set(skb, uarg); + return skb->len - orig_len; +} +EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream); + +static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig, + gfp_t gfp_mask) +{ + if (skb_zcopy(orig)) { + if (skb_zcopy(nskb)) { + /* !gfp_mask callers are verified to !skb_zcopy(nskb) */ + if (!gfp_mask) { + WARN_ON_ONCE(1); + return -ENOMEM; + } + if (skb_uarg(nskb) == skb_uarg(orig)) + return 0; + if (skb_copy_ubufs(nskb, GFP_ATOMIC)) + return -EIO; + } + skb_zcopy_set(nskb, skb_uarg(orig)); + } + return 0; +} + /** * skb_copy_ubufs - copy userspace skb frags buffers to kernel * @skb: the skb to modify @@ -958,15 +1181,19 @@ EXPORT_SYMBOL_GPL(skb_morph); */ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) { - int i; int num_frags = skb_shinfo(skb)->nr_frags; struct page *page, *head = NULL; - struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg; + int i, new_frags; + u32 d_off; - for (i = 0; i < num_frags; i++) { - u8 *vaddr; - skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + if (!num_frags) + return 0; + if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) + return -EINVAL; + + new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < new_frags; i++) { page = alloc_page(gfp_mask); if (!page) { while (head) { @@ -976,28 +1203,51 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) } return -ENOMEM; } - vaddr = kmap_atomic(skb_frag_page(f)); - memcpy(page_address(page), - vaddr + f->page_offset, skb_frag_size(f)); - kunmap_atomic(vaddr); set_page_private(page, (unsigned long)head); head = page; } + page = head; + d_off = 0; + for (i = 0; i < num_frags; i++) { + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + u32 p_off, p_len, copied; + struct page *p; + u8 *vaddr; + + skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f), + p, p_off, p_len, copied) { + u32 copy, done = 0; + vaddr = kmap_atomic(p); + + while (done < p_len) { + if (d_off == PAGE_SIZE) { + d_off = 0; + page = (struct page *)page_private(page); + } + copy = min_t(u32, PAGE_SIZE - d_off, p_len - done); + memcpy(page_address(page) + d_off, + vaddr + p_off + done, copy); + done += copy; + d_off += copy; + } + kunmap_atomic(vaddr); + } + } + /* skb frags release userspace buffers */ for (i = 0; i < num_frags; i++) skb_frag_unref(skb, i); - uarg->callback(uarg, false); - /* skb frags point to kernel buffers */ - for (i = num_frags - 1; i >= 0; i--) { - __skb_fill_page_desc(skb, i, head, 0, - skb_shinfo(skb)->frags[i].size); + for (i = 0; i < new_frags - 1; i++) { + __skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE); head = (struct page *)page_private(head); } + __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); + skb_shinfo(skb)->nr_frags = new_frags; - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; + skb_zcopy_clear(skb, false); return 0; } EXPORT_SYMBOL_GPL(skb_copy_ubufs); @@ -1158,7 +1408,8 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, if (skb_shinfo(skb)->nr_frags) { int i; - if (skb_orphan_frags(skb, gfp_mask)) { + if (skb_orphan_frags(skb, gfp_mask) || + skb_zerocopy_clone(n, skb, gfp_mask)) { kfree_skb(n); n = NULL; goto out; @@ -1235,9 +1486,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, * be since all we did is relocate the values */ if (skb_cloned(skb)) { - /* copy this zero copy skb frags */ if (skb_orphan_frags(skb, gfp_mask)) goto nofrags; + if (skb_zcopy(skb)) + atomic_inc(&skb_uarg(skb)->refcnt); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_frag_ref(skb, i); @@ -1719,6 +1971,8 @@ pull_pages: if (eat) { skb_shinfo(skb)->frags[k].page_offset += eat; skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); + if (!i) + goto end; eat = 0; } k++; @@ -1726,9 +1980,13 @@ pull_pages: } skb_shinfo(skb)->nr_frags = k; +end: skb->tail += delta; skb->data_len -= delta; + if (!skb->data_len) + skb_zcopy_clear(skb, false); + return skb_tail_pointer(skb); } EXPORT_SYMBOL(__pskb_pull_tail); @@ -1776,16 +2034,20 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) end = start + skb_frag_size(f); if ((copy = end - offset) > 0) { + u32 p_off, p_len, copied; + struct page *p; u8 *vaddr; if (copy > len) copy = len; - vaddr = kmap_atomic(skb_frag_page(f)); - memcpy(to, - vaddr + f->page_offset + offset - start, - copy); - kunmap_atomic(vaddr); + skb_frag_foreach_page(f, + f->page_offset + offset - start, + copy, p, p_off, p_len, copied) { + vaddr = kmap_atomic(p); + memcpy(to + copied, vaddr + p_off, p_len); + kunmap_atomic(vaddr); + } if ((len -= copy) == 0) return 0; @@ -2005,6 +2267,107 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, } EXPORT_SYMBOL_GPL(skb_splice_bits); +/* Send skb data on a socket. Socket must be locked. */ +int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, + int len) +{ + unsigned int orig_len = len; + struct sk_buff *head = skb; + unsigned short fragidx; + int slen, ret; + +do_frag_list: + + /* Deal with head data */ + while (offset < skb_headlen(skb) && len) { + struct kvec kv; + struct msghdr msg; + + slen = min_t(int, len, skb_headlen(skb) - offset); + kv.iov_base = skb->data + offset; + kv.iov_len = len; + memset(&msg, 0, sizeof(msg)); + + ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen); + if (ret <= 0) + goto error; + + offset += ret; + len -= ret; + } + + /* All the data was skb head? */ + if (!len) + goto out; + + /* Make offset relative to start of frags */ + offset -= skb_headlen(skb); + + /* Find where we are in frag list */ + for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; + + if (offset < frag->size) + break; + + offset -= frag->size; + } + + for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; + + slen = min_t(size_t, len, frag->size - offset); + + while (slen) { + ret = kernel_sendpage_locked(sk, frag->page.p, + frag->page_offset + offset, + slen, MSG_DONTWAIT); + if (ret <= 0) + goto error; + + len -= ret; + offset += ret; + slen -= ret; + } + + offset = 0; + } + + if (len) { + /* Process any frag lists */ + + if (skb == head) { + if (skb_has_frag_list(skb)) { + skb = skb_shinfo(skb)->frag_list; + goto do_frag_list; + } + } else if (skb->next) { + skb = skb->next; + goto do_frag_list; + } + } + +out: + return orig_len - len; + +error: + return orig_len == len ? ret : orig_len - len; +} +EXPORT_SYMBOL_GPL(skb_send_sock_locked); + +/* Send skb data on a socket. */ +int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len) +{ + int ret = 0; + + lock_sock(sk); + ret = skb_send_sock_locked(sk, skb, offset, len); + release_sock(sk); + + return ret; +} +EXPORT_SYMBOL_GPL(skb_send_sock); + /** * skb_store_bits - store bits from kernel buffer to skb * @skb: destination buffer @@ -2044,15 +2407,20 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { + u32 p_off, p_len, copied; + struct page *p; u8 *vaddr; if (copy > len) copy = len; - vaddr = kmap_atomic(skb_frag_page(frag)); - memcpy(vaddr + frag->page_offset + offset - start, - from, copy); - kunmap_atomic(vaddr); + skb_frag_foreach_page(frag, + frag->page_offset + offset - start, + copy, p, p_off, p_len, copied) { + vaddr = kmap_atomic(p); + memcpy(vaddr + p_off, from + copied, p_len); + kunmap_atomic(vaddr); + } if ((len -= copy) == 0) return 0; @@ -2117,20 +2485,27 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { + u32 p_off, p_len, copied; + struct page *p; __wsum csum2; u8 *vaddr; if (copy > len) copy = len; - vaddr = kmap_atomic(skb_frag_page(frag)); - csum2 = ops->update(vaddr + frag->page_offset + - offset - start, copy, 0); - kunmap_atomic(vaddr); - csum = ops->combine(csum, csum2, pos, copy); + + skb_frag_foreach_page(frag, + frag->page_offset + offset - start, + copy, p, p_off, p_len, copied) { + vaddr = kmap_atomic(p); + csum2 = ops->update(vaddr + p_off, p_len, 0); + kunmap_atomic(vaddr); + csum = ops->combine(csum, csum2, pos, p_len); + pos += p_len; + } + if (!(len -= copy)) return csum; offset += copy; - pos += copy; } start = end; } @@ -2203,24 +2578,31 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + u32 p_off, p_len, copied; + struct page *p; __wsum csum2; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; if (copy > len) copy = len; - vaddr = kmap_atomic(skb_frag_page(frag)); - csum2 = csum_partial_copy_nocheck(vaddr + - frag->page_offset + - offset - start, to, - copy, 0); - kunmap_atomic(vaddr); - csum = csum_block_add(csum, csum2, pos); + + skb_frag_foreach_page(frag, + frag->page_offset + offset - start, + copy, p, p_off, p_len, copied) { + vaddr = kmap_atomic(p); + csum2 = csum_partial_copy_nocheck(vaddr + p_off, + to + copied, + p_len, 0); + kunmap_atomic(vaddr); + csum = csum_block_add(csum, csum2, pos); + pos += p_len; + } + if (!(len -= copy)) return csum; offset += copy; to += copy; - pos += copy; } start = end; } @@ -2360,6 +2742,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) skb_tx_error(from); return -ENOMEM; } + skb_zerocopy_clone(to, from, GFP_ATOMIC); for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { if (!len) @@ -2657,6 +3040,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + skb_zerocopy_clone(skb1, skb, 0); if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -2700,6 +3084,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) if (skb_headlen(skb)) return 0; + if (skb_zcopy(tgt) || skb_zcopy(skb)) + return 0; todo = shiftlen; from = 0; @@ -3273,6 +3659,8 @@ normal: skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & SKBTX_SHARED_FRAG; + if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC)) + goto err; while (pos < offset + len) { if (i >= nfrags) { @@ -4396,6 +4784,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, if (skb_has_frag_list(to) || skb_has_frag_list(from)) return false; + if (skb_zcopy(to) || skb_zcopy(from)) + return false; if (skb_headlen(from) != 0) { struct page *page; diff --git a/net/core/sock.c b/net/core/sock.c index ac2a404c73eb..9ea988d25b0a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1055,6 +1055,20 @@ set_rcvbuf: if (val == 1) dst_negative_advice(sk); break; + + case SO_ZEROCOPY: + if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) + ret = -ENOTSUPP; + else if (sk->sk_protocol != IPPROTO_TCP) + ret = -ENOTSUPP; + else if (sk->sk_state != TCP_CLOSE) + ret = -EBUSY; + else if (val < 0 || val > 1) + ret = -EINVAL; + else + sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool); + break; + default: ret = -ENOPROTOOPT; break; @@ -1383,6 +1397,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val64 = sock_gen_cookie(sk); break; + case SO_ZEROCOPY: + v.val = sock_flag(sk, SOCK_ZEROCOPY); + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). @@ -1670,6 +1688,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; + atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); @@ -1923,6 +1942,33 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, } EXPORT_SYMBOL(sock_wmalloc); +static void sock_ofree(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + atomic_sub(skb->truesize, &sk->sk_omem_alloc); +} + +struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, + gfp_t priority) +{ + struct sk_buff *skb; + + /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */ + if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) > + sysctl_optmem_max) + return NULL; + + skb = alloc_skb(size, priority); + if (!skb) + return NULL; + + atomic_add(skb->truesize, &sk->sk_omem_alloc); + skb->sk = sk; + skb->destructor = sock_ofree; + return skb; +} + /* * Allocate a memory block from the socket's option memory buffer. */ @@ -2500,6 +2546,12 @@ int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) } EXPORT_SYMBOL(sock_no_sendmsg); +int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(sock_no_sendmsg_locked); + int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) { @@ -2528,6 +2580,22 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz } EXPORT_SYMBOL(sock_no_sendpage); +ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page, + int offset, size_t size, int flags) +{ + ssize_t res; + struct msghdr msg = {.msg_flags = flags}; + struct kvec iov; + char *kaddr = kmap(page); + + iov.iov_base = kaddr + offset; + iov.iov_len = size; + res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size); + kunmap(page); + return res; +} +EXPORT_SYMBOL(sock_no_sendpage_locked); + /* * Default Socket Callbacks */ @@ -2673,6 +2741,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_stamp = SK_DEFAULT_STAMP; + atomic_set(&sk->sk_zckey, 0); #ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 733f523707ac..bae7d78aa068 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1938,8 +1938,8 @@ static int __init dcbnl_init(void) { INIT_LIST_HEAD(&dcb_app_list); - rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0); return 0; } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 1b202f16531f..001c08696334 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) sk = __inet_lookup_established(net, &dccp_hashinfo, iph->daddr, dh->dccph_dport, iph->saddr, ntohs(dh->dccph_sport), - inet_iif(skb)); + inet_iif(skb), 0); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; @@ -804,7 +804,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) lookup: sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), - dh->dccph_sport, dh->dccph_dport, &refcounted); + dh->dccph_sport, dh->dccph_dport, 0, &refcounted); if (!sk) { dccp_pr_debug("failed to look up flow ID in table and " "get corresponding socket\n"); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 1b58eac8aad3..47a7b59b355e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -89,7 +89,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __inet6_lookup_established(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, &hdr->saddr, ntohs(dh->dccph_sport), - inet6_iif(skb)); + inet6_iif(skb), 0); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), @@ -687,7 +687,7 @@ static int dccp_v6_rcv(struct sk_buff *skb) lookup: sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), dh->dccph_sport, dh->dccph_dport, - inet6_iif(skb), &refcounted); + inet6_iif(skb), 0, &refcounted); if (!sk) { dccp_pr_debug("failed to look up flow ID in table and " "get corresponding socket\n"); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index fa0110b57ca1..4d339de56862 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1419,9 +1419,9 @@ void __init dn_dev_init(void) dn_dev_devices_on(); - rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, NULL); - rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, NULL); - rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, NULL); + rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, 0); + rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, 0); + rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, 0); proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index f9f6fb3f3c5b..3d37464c8b4a 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -791,8 +791,8 @@ void __init dn_fib_init(void) register_dnaddr_notifier(&dn_fib_dnaddr_notifier); - rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, NULL); - rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, NULL); + rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, 0); + rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, 0); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index bcbe548f8854..0bd3afd01dd2 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1922,10 +1922,10 @@ void __init dn_route_init(void) #ifdef CONFIG_DECNET_ROUTER rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, - dn_fib_dump, NULL); + dn_fib_dump, 0); #else rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, - dn_cache_dump, NULL); + dn_cache_dump, 0); #endif } diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 416ac4ef9ba9..99e38af85fc5 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -67,17 +67,17 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { [DSA_TAG_PROTO_NONE] = &none_ops, }; -int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, - struct dsa_port *dport, int port) +int dsa_cpu_dsa_setup(struct dsa_port *port) { - struct device_node *port_dn = dport->dn; + struct device_node *port_dn = port->dn; + struct dsa_switch *ds = port->ds; struct phy_device *phydev; int ret, mode; if (of_phy_is_fixed_link(port_dn)) { ret = of_phy_register_fixed_link(port_dn); if (ret) { - dev_err(dev, "failed to register fixed PHY\n"); + dev_err(ds->dev, "failed to register fixed PHY\n"); return ret; } phydev = of_phy_find_device(port_dn); @@ -90,7 +90,7 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, genphy_config_init(phydev); genphy_read_status(phydev); if (ds->ops->adjust_link) - ds->ops->adjust_link(ds, port, phydev); + ds->ops->adjust_link(ds, port->index, phydev); put_device(&phydev->mdio.dev); } @@ -190,6 +190,8 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, { struct dsa_switch_tree *dst = dev->dsa_ptr; struct sk_buff *nskb = NULL; + struct pcpu_sw_netstats *s; + struct dsa_slave_priv *p; if (unlikely(dst == NULL)) { kfree_skb(skb); @@ -207,12 +209,16 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, } skb = nskb; + p = netdev_priv(skb->dev); skb_push(skb, ETH_HLEN); skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; + s = this_cpu_ptr(p->stats64); + u64_stats_update_begin(&s->syncp); + s->rx_packets++; + s->rx_bytes += skb->len; + u64_stats_update_end(&s->syncp); netif_receive_skb(skb); @@ -220,6 +226,11 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, } #ifdef CONFIG_PM_SLEEP +static bool dsa_is_port_initialized(struct dsa_switch *ds, int p) +{ + return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; +} + int dsa_switch_suspend(struct dsa_switch *ds) { int i, ret = 0; @@ -271,10 +282,22 @@ static struct packet_type dsa_pack_type __read_mostly = { .func = dsa_switch_rcv, }; +static struct workqueue_struct *dsa_owq; + +bool dsa_schedule_work(struct work_struct *work) +{ + return queue_work(dsa_owq, work); +} + static int __init dsa_init_module(void) { int rc; + dsa_owq = alloc_ordered_workqueue("dsa_ordered", + WQ_MEM_RECLAIM); + if (!dsa_owq) + return -ENOMEM; + rc = dsa_slave_register_notifier(); if (rc) return rc; @@ -294,6 +317,7 @@ static void __exit dsa_cleanup_module(void) dsa_slave_unregister_notifier(); dev_remove_pack(&dsa_pack_type); dsa_legacy_unregister(); + destroy_workqueue(dsa_owq); } module_exit(dsa_cleanup_module); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index c442051d5a55..cceaa4dd9f53 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -219,7 +219,7 @@ static int dsa_dsa_port_apply(struct dsa_port *port) struct dsa_switch *ds = port->ds; int err; - err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index); + err = dsa_cpu_dsa_setup(port); if (err) { dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n", port->index, err); @@ -243,7 +243,7 @@ static int dsa_cpu_port_apply(struct dsa_port *port) struct dsa_switch *ds = port->ds; int err; - err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index); + err = dsa_cpu_dsa_setup(port); if (err) { dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n", port->index, err); @@ -275,7 +275,7 @@ static int dsa_user_port_apply(struct dsa_port *port) if (!name) name = "eth%d"; - err = dsa_slave_create(ds, ds->dev, port->index, name); + err = dsa_slave_create(port, name); if (err) { dev_warn(ds->dev, "Failed to create slave %d: %d\n", port->index, err); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 55982cc39b24..9c3eeb72462d 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -43,10 +43,10 @@ struct dsa_notifier_bridge_info { /* DSA_NOTIFIER_FDB_* */ struct dsa_notifier_fdb_info { - const struct switchdev_obj_port_fdb *fdb; - struct switchdev_trans *trans; int sw_index; int port; + const unsigned char *addr; + u16 vid; }; /* DSA_NOTIFIER_MDB_* */ @@ -65,18 +65,13 @@ struct dsa_notifier_vlan_info { int port; }; -struct dsa_device_ops { - struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); - struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev); -}; - struct dsa_slave_priv { /* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */ struct sk_buff * (*xmit)(struct sk_buff *skb, struct net_device *dev); + struct pcpu_sw_netstats *stats64; + /* DSA port data, such as switch, port index, etc. */ struct dsa_port *dp; @@ -99,16 +94,23 @@ struct dsa_slave_priv { }; /* dsa.c */ -int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, - struct dsa_port *dport, int port); +int dsa_cpu_dsa_setup(struct dsa_port *port); void dsa_cpu_dsa_destroy(struct dsa_port *dport); const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp); void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp); +bool dsa_schedule_work(struct work_struct *work); /* legacy.c */ int dsa_legacy_register(void); void dsa_legacy_unregister(void); +int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, + u16 flags); +int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid); /* port.c */ int dsa_port_set_state(struct dsa_port *dp, u8 state, @@ -120,35 +122,25 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, struct switchdev_trans *trans); int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, struct switchdev_trans *trans); -int dsa_port_fdb_add(struct dsa_port *dp, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans); -int dsa_port_fdb_del(struct dsa_port *dp, - const struct switchdev_obj_port_fdb *fdb); -int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb, - switchdev_obj_dump_cb_t *cb); +int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid); int dsa_port_mdb_add(struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb, struct switchdev_trans *trans); int dsa_port_mdb_del(struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb); -int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb, - switchdev_obj_dump_cb_t *cb); int dsa_port_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans); int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan); -int dsa_port_vlan_dump(struct dsa_port *dp, - struct switchdev_obj_port_vlan *vlan, - switchdev_obj_dump_cb_t *cb); - /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops); -int dsa_slave_create(struct dsa_switch *ds, struct device *parent, - int port, const char *name); +int dsa_slave_create(struct dsa_port *port, const char *name); void dsa_slave_destroy(struct net_device *slave_dev); int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 1d7a3282f2a7..91e6f7981d39 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -78,25 +78,23 @@ dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, } /* basic switch operations **************************************************/ -static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev) +static int dsa_cpu_dsa_setups(struct dsa_switch *ds) { - struct dsa_port *dport; int ret, port; for (port = 0; port < ds->num_ports; port++) { if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) continue; - dport = &ds->ports[port]; - ret = dsa_cpu_dsa_setup(ds, dev, dport, port); + ret = dsa_cpu_dsa_setup(&ds->ports[port]); if (ret) return ret; } return 0; } -static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master, - struct device *parent) +static int dsa_switch_setup_one(struct dsa_switch *ds, + struct net_device *master) { const struct dsa_switch_ops *ops = ds->ops; struct dsa_switch_tree *dst = ds->dst; @@ -176,7 +174,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master } if (!ds->slave_mii_bus && ops->phy_read) { - ds->slave_mii_bus = devm_mdiobus_alloc(parent); + ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) return -ENOMEM; dsa_slave_mii_bus_init(ds); @@ -196,14 +194,14 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master if (!(ds->enabled_port_mask & (1 << i))) continue; - ret = dsa_slave_create(ds, parent, i, cd->port_names[i]); + ret = dsa_slave_create(&ds->ports[i], cd->port_names[i]); if (ret < 0) netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n", index, i, cd->port_names[i], ret); } /* Perform configuration of the CPU and DSA ports */ - ret = dsa_cpu_dsa_setups(ds, parent); + ret = dsa_cpu_dsa_setups(ds); if (ret < 0) netdev_err(master, "[%d] : can't configure CPU and DSA ports\n", index); @@ -252,7 +250,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, struct net_device *master, ds->ops = ops; ds->priv = priv; - ret = dsa_switch_setup_one(ds, master, parent); + ret = dsa_switch_setup_one(ds, master); if (ret) return ERR_PTR(ret); @@ -741,6 +739,28 @@ static int dsa_resume(struct device *d) } #endif +/* legacy way, bypassing the bridge *****************************************/ +int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, + u16 flags) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = p->dp; + + return dsa_port_fdb_add(dp, addr, vid); +} + +int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = p->dp; + + return dsa_port_fdb_del(dp, addr, vid); +} + static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume); static const struct of_device_id dsa_of_match_table[] = { diff --git a/net/dsa/port.c b/net/dsa/port.c index efc3bce3a89d..659676ba3f8b 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -146,43 +146,33 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); } -int dsa_port_fdb_add(struct dsa_port *dp, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans) +int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid) { struct dsa_notifier_fdb_info info = { .sw_index = dp->ds->index, .port = dp->index, - .trans = trans, - .fdb = fdb, + .addr = addr, + .vid = vid, }; return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info); } -int dsa_port_fdb_del(struct dsa_port *dp, - const struct switchdev_obj_port_fdb *fdb) +int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid) { struct dsa_notifier_fdb_info info = { .sw_index = dp->ds->index, .port = dp->index, - .fdb = fdb, + .addr = addr, + .vid = vid, + }; return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info); } -int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb, - switchdev_obj_dump_cb_t *cb) -{ - struct dsa_switch *ds = dp->ds; - - if (ds->ops->port_fdb_dump) - return ds->ops->port_fdb_dump(ds, dp->index, fdb, cb); - - return -EOPNOTSUPP; -} - int dsa_port_mdb_add(struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb, struct switchdev_trans *trans) @@ -209,17 +199,6 @@ int dsa_port_mdb_del(struct dsa_port *dp, return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info); } -int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb, - switchdev_obj_dump_cb_t *cb) -{ - struct dsa_switch *ds = dp->ds; - - if (ds->ops->port_mdb_dump) - return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb); - - return -EOPNOTSUPP; -} - int dsa_port_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) @@ -245,15 +224,3 @@ int dsa_port_vlan_del(struct dsa_port *dp, return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); } - -int dsa_port_vlan_dump(struct dsa_port *dp, - struct switchdev_obj_port_vlan *vlan, - switchdev_obj_dump_cb_t *cb) -{ - struct dsa_switch *ds = dp->ds; - - if (ds->ops->port_vlan_dump) - return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb); - - return -EOPNOTSUPP; -} diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9507bd38cf04..8c79011c5a83 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -199,6 +199,83 @@ out: return 0; } +struct dsa_slave_dump_ctx { + struct net_device *dev; + struct sk_buff *skb; + struct netlink_callback *cb; + int idx; +}; + +static int +dsa_slave_port_fdb_do_dump(const unsigned char *addr, u16 vid, + bool is_static, void *data) +{ + struct dsa_slave_dump_ctx *dump = data; + u32 portid = NETLINK_CB(dump->cb->skb).portid; + u32 seq = dump->cb->nlh->nlmsg_seq; + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + if (dump->idx < dump->cb->args[2]) + goto skip; + + nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, + sizeof(*ndm), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = NTF_SELF; + ndm->ndm_type = 0; + ndm->ndm_ifindex = dump->dev->ifindex; + ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; + + if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, addr)) + goto nla_put_failure; + + if (vid && nla_put_u16(dump->skb, NDA_VLAN, vid)) + goto nla_put_failure; + + nlmsg_end(dump->skb, nlh); + +skip: + dump->idx++; + return 0; + +nla_put_failure: + nlmsg_cancel(dump->skb, nlh); + return -EMSGSIZE; +} + +static int +dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, struct net_device *filter_dev, + int *idx) +{ + struct dsa_slave_dump_ctx dump = { + .dev = dev, + .skb = skb, + .cb = cb, + .idx = *idx, + }; + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = p->dp; + struct dsa_switch *ds = dp->ds; + int err; + + if (!ds->ops->port_fdb_dump) + return -EOPNOTSUPP; + + err = ds->ops->port_fdb_dump(ds, dp->index, + dsa_slave_port_fdb_do_dump, + &dump); + *idx = dump.idx; + return err; +} + static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -250,9 +327,6 @@ static int dsa_slave_port_obj_add(struct net_device *dev, */ switch (obj->id) { - case SWITCHDEV_OBJ_ID_PORT_FDB: - err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj), trans); - break; case SWITCHDEV_OBJ_ID_PORT_MDB: err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans); break; @@ -276,9 +350,6 @@ static int dsa_slave_port_obj_del(struct net_device *dev, int err; switch (obj->id) { - case SWITCHDEV_OBJ_ID_PORT_FDB: - err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj)); - break; case SWITCHDEV_OBJ_ID_PORT_MDB: err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; @@ -293,32 +364,6 @@ static int dsa_slave_port_obj_del(struct net_device *dev, return err; } -static int dsa_slave_port_obj_dump(struct net_device *dev, - struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; - int err; - - switch (obj->id) { - case SWITCHDEV_OBJ_ID_PORT_FDB: - err = dsa_port_fdb_dump(dp, SWITCHDEV_OBJ_PORT_FDB(obj), cb); - break; - case SWITCHDEV_OBJ_ID_PORT_MDB: - err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb); - break; - case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = dsa_port_vlan_dump(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), cb); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - static int dsa_slave_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { @@ -330,6 +375,9 @@ static int dsa_slave_port_attr_get(struct net_device *dev, attr->u.ppid.id_len = sizeof(ds->index); memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len); break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: + attr->u.brport_flags_support = 0; + break; default: return -EOPNOTSUPP; } @@ -352,10 +400,14 @@ static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p, static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); + struct pcpu_sw_netstats *s; struct sk_buff *nskb; - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + s = this_cpu_ptr(p->stats64); + u64_stats_update_begin(&s->syncp); + s->tx_packets++; + s->tx_bytes += skb->len; + u64_stats_update_end(&s->syncp); /* Transmit function may have to reallocate the original SKB, * in which case it must have freed it. Only free it here on error. @@ -594,11 +646,26 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->dp->ds; - - data[0] = dev->stats.tx_packets; - data[1] = dev->stats.tx_bytes; - data[2] = dev->stats.rx_packets; - data[3] = dev->stats.rx_bytes; + struct pcpu_sw_netstats *s; + unsigned int start; + int i; + + for_each_possible_cpu(i) { + u64 tx_packets, tx_bytes, rx_packets, rx_bytes; + + s = per_cpu_ptr(p->stats64, i); + do { + start = u64_stats_fetch_begin_irq(&s->syncp); + tx_packets = s->tx_packets; + tx_bytes = s->tx_bytes; + rx_packets = s->rx_packets; + rx_bytes = s->rx_bytes; + } while (u64_stats_fetch_retry_irq(&s->syncp, start)); + data[0] += tx_packets; + data[1] += tx_bytes; + data[2] += rx_packets; + data[3] += rx_bytes; + } if (ds->ops->get_ethtool_stats) ds->ops->get_ethtool_stats(ds, p->dp->index, data + 4); } @@ -648,17 +715,24 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) struct dsa_switch *ds = p->dp->ds; int ret; - if (!ds->ops->set_eee) + /* Port's PHY and MAC both need to be EEE capable */ + if (!p->phy) + return -ENODEV; + + if (!ds->ops->set_mac_eee) return -EOPNOTSUPP; - ret = ds->ops->set_eee(ds, p->dp->index, p->phy, e); + ret = ds->ops->set_mac_eee(ds, p->dp->index, e); if (ret) return ret; - if (p->phy) - ret = phy_ethtool_set_eee(p->phy, e); + if (e->eee_enabled) { + ret = phy_init_eee(p->phy, 0); + if (ret) + return ret; + } - return ret; + return phy_ethtool_set_eee(p->phy, e); } static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) @@ -667,17 +741,18 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) struct dsa_switch *ds = p->dp->ds; int ret; - if (!ds->ops->get_eee) + /* Port's PHY and MAC both need to be EEE capable */ + if (!p->phy) + return -ENODEV; + + if (!ds->ops->get_mac_eee) return -EOPNOTSUPP; - ret = ds->ops->get_eee(ds, p->dp->index, e); + ret = ds->ops->get_mac_eee(ds, p->dp->index, e); if (ret) return ret; - if (p->phy) - ret = phy_ethtool_get_eee(p->phy, e); - - return ret; + return phy_ethtool_get_eee(p->phy, e); } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -747,12 +822,12 @@ dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p, } static int dsa_slave_add_cls_matchall(struct net_device *dev, - __be16 protocol, struct tc_cls_matchall_offload *cls, bool ingress) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_mall_tc_entry *mall_tc_entry; + __be16 protocol = cls->common.protocol; struct dsa_switch *ds = p->dp->ds; struct net *net = dev_net(dev); struct dsa_slave_priv *to_p; @@ -765,7 +840,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, if (!ds->ops->port_mirror_add) return err; - if (!tc_single_action(cls->exts)) + if (!tcf_exts_has_one_action(cls->exts)) return err; tcf_exts_to_list(cls->exts, &actions); @@ -836,31 +911,64 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev, kfree(mall_tc_entry); } -static int dsa_slave_setup_tc(struct net_device *dev, u32 handle, - u32 chain_index, __be16 protocol, - struct tc_to_netdev *tc) +static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev, + struct tc_cls_matchall_offload *cls) { - bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS); + bool ingress = TC_H_MAJ(cls->common.handle) == TC_H_MAJ(TC_H_INGRESS); - if (chain_index) + if (cls->common.chain_index) return -EOPNOTSUPP; - switch (tc->type) { - case TC_SETUP_MATCHALL: - switch (tc->cls_mall->command) { - case TC_CLSMATCHALL_REPLACE: - return dsa_slave_add_cls_matchall(dev, protocol, - tc->cls_mall, - ingress); - case TC_CLSMATCHALL_DESTROY: - dsa_slave_del_cls_matchall(dev, tc->cls_mall); - return 0; - } + switch (cls->command) { + case TC_CLSMATCHALL_REPLACE: + return dsa_slave_add_cls_matchall(dev, cls, ingress); + case TC_CLSMATCHALL_DESTROY: + dsa_slave_del_cls_matchall(dev, cls); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_CLSMATCHALL: + return dsa_slave_setup_tc_cls_matchall(dev, type_data); default: return -EOPNOTSUPP; } } +static void dsa_slave_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct pcpu_sw_netstats *s; + unsigned int start; + int i; + + netdev_stats_to_stats64(stats, &dev->stats); + for_each_possible_cpu(i) { + u64 tx_packets, tx_bytes, rx_packets, rx_bytes; + + s = per_cpu_ptr(p->stats64, i); + do { + start = u64_stats_fetch_begin_irq(&s->syncp); + tx_packets = s->tx_packets; + tx_bytes = s->tx_bytes; + rx_packets = s->rx_packets; + rx_bytes = s->rx_bytes; + } while (u64_stats_fetch_retry_irq(&s->syncp, start)); + + stats->tx_packets += tx_packets; + stats->tx_bytes += tx_bytes; + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; + } +} + void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops) { ops->get_sset_count = dsa_cpu_port_get_sset_count; @@ -921,9 +1029,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, - .ndo_fdb_add = switchdev_port_fdb_add, - .ndo_fdb_del = switchdev_port_fdb_del, - .ndo_fdb_dump = switchdev_port_fdb_dump, + .ndo_fdb_add = dsa_legacy_fdb_add, + .ndo_fdb_del = dsa_legacy_fdb_del, + .ndo_fdb_dump = dsa_slave_fdb_dump, .ndo_do_ioctl = dsa_slave_ioctl, .ndo_get_iflink = dsa_slave_get_iflink, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -931,11 +1039,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_netpoll_cleanup = dsa_slave_netpoll_cleanup, .ndo_poll_controller = dsa_slave_poll_controller, #endif - .ndo_bridge_getlink = switchdev_port_bridge_getlink, - .ndo_bridge_setlink = switchdev_port_bridge_setlink, - .ndo_bridge_dellink = switchdev_port_bridge_dellink, .ndo_get_phys_port_name = dsa_slave_get_phys_port_name, .ndo_setup_tc = dsa_slave_setup_tc, + .ndo_get_stats64 = dsa_slave_get_stats64, }; static const struct switchdev_ops dsa_slave_switchdev_ops = { @@ -943,7 +1049,6 @@ static const struct switchdev_ops dsa_slave_switchdev_ops = { .switchdev_port_attr_set = dsa_slave_port_attr_set, .switchdev_port_obj_add = dsa_slave_port_obj_add, .switchdev_port_obj_del = dsa_slave_port_obj_del, - .switchdev_port_obj_dump = dsa_slave_port_obj_dump, }; static struct device_type dsa_type = { @@ -1134,9 +1239,9 @@ int dsa_slave_resume(struct net_device *slave_dev) return 0; } -int dsa_slave_create(struct dsa_switch *ds, struct device *parent, - int port, const char *name) +int dsa_slave_create(struct dsa_port *port, const char *name) { + struct dsa_switch *ds = port->ds; struct dsa_switch_tree *dst = ds->dst; struct net_device *master; struct net_device *slave_dev; @@ -1166,12 +1271,17 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, NULL); - SET_NETDEV_DEV(slave_dev, parent); - slave_dev->dev.of_node = ds->ports[port].dn; + SET_NETDEV_DEV(slave_dev, port->ds->dev); + slave_dev->dev.of_node = port->dn; slave_dev->vlan_features = master->vlan_features; p = netdev_priv(slave_dev); - p->dp = &ds->ports[port]; + p->stats64 = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!p->stats64) { + free_netdev(slave_dev); + return -ENOMEM; + } + p->dp = port; INIT_LIST_HEAD(&p->mall_tc_list); p->xmit = dst->tag_ops->xmit; @@ -1179,12 +1289,13 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, p->old_link = -1; p->old_duplex = -1; - ds->ports[port].netdev = slave_dev; + port->netdev = slave_dev; ret = register_netdev(slave_dev); if (ret) { netdev_err(master, "error %d registering interface %s\n", ret, slave_dev->name); - ds->ports[port].netdev = NULL; + port->netdev = NULL; + free_percpu(p->stats64); free_netdev(slave_dev); return ret; } @@ -1195,6 +1306,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, if (ret) { netdev_err(master, "error %d setting up slave phy\n", ret); unregister_netdev(slave_dev); + free_percpu(p->stats64); free_netdev(slave_dev); return ret; } @@ -1217,6 +1329,7 @@ void dsa_slave_destroy(struct net_device *slave_dev) of_phy_deregister_fixed_link(port_dn); } unregister_netdev(slave_dev); + free_percpu(p->stats64); free_netdev(slave_dev); } @@ -1259,19 +1372,142 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, return NOTIFY_DONE; } +struct dsa_switchdev_event_work { + struct work_struct work; + struct switchdev_notifier_fdb_info fdb_info; + struct net_device *dev; + unsigned long event; +}; + +static void dsa_slave_switchdev_event_work(struct work_struct *work) +{ + struct dsa_switchdev_event_work *switchdev_work = + container_of(work, struct dsa_switchdev_event_work, work); + struct net_device *dev = switchdev_work->dev; + struct switchdev_notifier_fdb_info *fdb_info; + struct dsa_slave_priv *p = netdev_priv(dev); + int err; + + rtnl_lock(); + switch (switchdev_work->event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + fdb_info = &switchdev_work->fdb_info; + err = dsa_port_fdb_add(p->dp, fdb_info->addr, fdb_info->vid); + if (err) { + netdev_dbg(dev, "fdb add failed err=%d\n", err); + break; + } + call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev, + &fdb_info->info); + break; + + case SWITCHDEV_FDB_DEL_TO_DEVICE: + fdb_info = &switchdev_work->fdb_info; + err = dsa_port_fdb_del(p->dp, fdb_info->addr, fdb_info->vid); + if (err) { + netdev_dbg(dev, "fdb del failed err=%d\n", err); + dev_close(dev); + } + break; + } + rtnl_unlock(); + + kfree(switchdev_work->fdb_info.addr); + kfree(switchdev_work); + dev_put(dev); +} + +static int +dsa_slave_switchdev_fdb_work_init(struct dsa_switchdev_event_work * + switchdev_work, + const struct switchdev_notifier_fdb_info * + fdb_info) +{ + memcpy(&switchdev_work->fdb_info, fdb_info, + sizeof(switchdev_work->fdb_info)); + switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC); + if (!switchdev_work->fdb_info.addr) + return -ENOMEM; + ether_addr_copy((u8 *)switchdev_work->fdb_info.addr, + fdb_info->addr); + return 0; +} + +/* Called under rcu_read_lock() */ +static int dsa_slave_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct dsa_switchdev_event_work *switchdev_work; + + if (!dsa_slave_dev_check(dev)) + return NOTIFY_DONE; + + switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); + if (!switchdev_work) + return NOTIFY_BAD; + + INIT_WORK(&switchdev_work->work, + dsa_slave_switchdev_event_work); + switchdev_work->dev = dev; + switchdev_work->event = event; + + switch (event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */ + case SWITCHDEV_FDB_DEL_TO_DEVICE: + if (dsa_slave_switchdev_fdb_work_init(switchdev_work, + ptr)) + goto err_fdb_work_init; + dev_hold(dev); + break; + default: + kfree(switchdev_work); + return NOTIFY_DONE; + } + + dsa_schedule_work(&switchdev_work->work); + return NOTIFY_OK; + +err_fdb_work_init: + kfree(switchdev_work); + return NOTIFY_BAD; +} + static struct notifier_block dsa_slave_nb __read_mostly = { - .notifier_call = dsa_slave_netdevice_event, + .notifier_call = dsa_slave_netdevice_event, +}; + +static struct notifier_block dsa_slave_switchdev_notifier = { + .notifier_call = dsa_slave_switchdev_event, }; int dsa_slave_register_notifier(void) { - return register_netdevice_notifier(&dsa_slave_nb); + int err; + + err = register_netdevice_notifier(&dsa_slave_nb); + if (err) + return err; + + err = register_switchdev_notifier(&dsa_slave_switchdev_notifier); + if (err) + goto err_switchdev_nb; + + return 0; + +err_switchdev_nb: + unregister_netdevice_notifier(&dsa_slave_nb); + return err; } void dsa_slave_unregister_notifier(void) { int err; + err = unregister_switchdev_notifier(&dsa_slave_switchdev_notifier); + if (err) + pr_err("DSA: failed to unregister switchdev notifier (%d)\n", err); + err = unregister_netdevice_notifier(&dsa_slave_nb); if (err) pr_err("DSA: failed to unregister slave notifier (%d)\n", err); diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 97e2e9c8cf3f..e6c06aa349a6 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -83,30 +83,20 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, static int dsa_switch_fdb_add(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { - const struct switchdev_obj_port_fdb *fdb = info->fdb; - struct switchdev_trans *trans = info->trans; - /* Do not care yet about other switch chips of the fabric */ if (ds->index != info->sw_index) return 0; - if (switchdev_trans_ph_prepare(trans)) { - if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add) - return -EOPNOTSUPP; - - return ds->ops->port_fdb_prepare(ds, info->port, fdb, trans); - } - - ds->ops->port_fdb_add(ds, info->port, fdb, trans); + if (!ds->ops->port_fdb_add) + return -EOPNOTSUPP; - return 0; + return ds->ops->port_fdb_add(ds, info->port, info->addr, + info->vid); } static int dsa_switch_fdb_del(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { - const struct switchdev_obj_port_fdb *fdb = info->fdb; - /* Do not care yet about other switch chips of the fabric */ if (ds->index != info->sw_index) return 0; @@ -114,7 +104,8 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds, if (!ds->ops->port_fdb_del) return -EOPNOTSUPP; - return ds->ops->port_fdb_del(ds, info->port, fdb); + return ds->ops->port_fdb_del(ds, info->port, info->addr, + info->vid); } static int dsa_switch_mdb_add(struct dsa_switch *ds, diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 247774d149f9..e23e7635fa00 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -39,7 +39,6 @@ */ #define LAN9303_TAG_LEN 4 -#define LAN9303_MAX_PORTS 3 static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -104,7 +103,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, source_port = ntohs(lan9303_tag[1]) & 0x3; - if (source_port >= LAN9303_MAX_PORTS) { + if (source_port >= ds->num_ports) { dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n"); return NULL; } diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 2f32b7ea3365..02163c045a96 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -87,7 +87,17 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, return skb; } +static int mtk_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, + int *offset) +{ + *offset = 4; + *proto = ((__be16 *)skb->data)[1]; + + return 0; +} + const struct dsa_device_ops mtk_netdev_ops = { - .xmit = mtk_tag_xmit, - .rcv = mtk_tag_rcv, + .xmit = mtk_tag_xmit, + .rcv = mtk_tag_rcv, + .flow_dissect = mtk_tag_flow_dissect, }; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2e548eca3489..d678820e4306 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -944,6 +944,8 @@ const struct proto_ops inet_stream_ops = { .sendpage = inet_sendpage, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, + .sendmsg_locked = tcp_sendmsg_locked, + .sendpage_locked = tcp_sendpage_locked, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, @@ -1219,10 +1221,9 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { - bool udpfrag = false, fixedid = false, gso_partial, encap; + bool fixedid = false, gso_partial, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; - unsigned int offset = 0; struct iphdr *iph; int proto, tot_len; int nhoff; @@ -1257,7 +1258,6 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); if (!skb->encapsulation || encap) { - udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); /* fixed ID is invalid if DF bit is not set */ @@ -1277,13 +1277,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, skb = segs; do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); - if (udpfrag) { - iph->frag_off = htons(offset >> 3); - if (skb->next) - iph->frag_off |= htons(IP_MF); - offset += skb->len - nhoff - ihl; - tot_len = skb->len - nhoff; - } else if (skb_is_gso(skb)) { + if (skb_is_gso(skb)) { if (!fixedid) { iph->id = htons(id); id += skb_shinfo(skb)->gso_segs; @@ -1778,6 +1772,11 @@ static const struct net_offload ipip_offload = { }, }; +static int __init ipip_offload_init(void) +{ + return inet_add_offload(&ipip_offload, IPPROTO_IPIP); +} + static int __init ipv4_offload_init(void) { /* @@ -1787,9 +1786,10 @@ static int __init ipv4_offload_init(void) pr_crit("%s: Cannot add UDP protocol offload\n", __func__); if (tcpv4_offload_init() < 0) pr_crit("%s: Cannot add TCP protocol offload\n", __func__); + if (ipip_offload_init() < 0) + pr_crit("%s: Cannot add IPIP protocol offload\n", __func__); dev_add_offload(&ip_packet_offload); - inet_add_offload(&ipip_offload, IPPROTO_IPIP); return 0; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 38d9af9b917c..d7adc0616599 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2491,9 +2491,9 @@ void __init devinet_init(void) rtnl_af_register(&inet_af_ops); - rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); - rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); - rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); + rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0); + rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0); + rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0); rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, - inet_netconf_dump_devconf, NULL); + inet_netconf_dump_devconf, 0); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 044d2a159a3c..37819ab4cc74 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1247,22 +1247,28 @@ static int __net_init ip_fib_net_init(struct net *net) int err; size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ; - net->ipv4.fib_seq = 0; + err = fib4_notifier_init(net); + if (err) + return err; /* Avoid false sharing : Use at least a full cache line */ size = max_t(size_t, size, L1_CACHE_BYTES); net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL); - if (!net->ipv4.fib_table_hash) - return -ENOMEM; + if (!net->ipv4.fib_table_hash) { + err = -ENOMEM; + goto err_table_hash_alloc; + } err = fib4_rules_init(net); if (err < 0) - goto fail; + goto err_rules_init; return 0; -fail: +err_rules_init: kfree(net->ipv4.fib_table_hash); +err_table_hash_alloc: + fib4_notifier_exit(net); return err; } @@ -1292,6 +1298,7 @@ static void ip_fib_net_exit(struct net *net) #endif rtnl_unlock(); kfree(net->ipv4.fib_table_hash); + fib4_notifier_exit(net); } static int __net_init fib_net_init(struct net *net) @@ -1341,7 +1348,7 @@ void __init ip_fib_init(void) register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); + rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0); + rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0); + rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, 0); } diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c index e0714d975947..5d7afb145562 100644 --- a/net/ipv4/fib_notifier.c +++ b/net/ipv4/fib_notifier.c @@ -1,86 +1,71 @@ #include <linux/rtnetlink.h> #include <linux/notifier.h> -#include <linux/rcupdate.h> +#include <linux/socket.h> #include <linux/kernel.h> #include <net/net_namespace.h> +#include <net/fib_notifier.h> #include <net/netns/ipv4.h> #include <net/ip_fib.h> -static ATOMIC_NOTIFIER_HEAD(fib_chain); - -int call_fib_notifier(struct notifier_block *nb, struct net *net, - enum fib_event_type event_type, - struct fib_notifier_info *info) +int call_fib4_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info) { - info->net = net; - return nb->notifier_call(nb, event_type, info); + info->family = AF_INET; + return call_fib_notifier(nb, net, event_type, info); } -int call_fib_notifiers(struct net *net, enum fib_event_type event_type, - struct fib_notifier_info *info) +int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) { + ASSERT_RTNL(); + + info->family = AF_INET; net->ipv4.fib_seq++; - info->net = net; - return atomic_notifier_call_chain(&fib_chain, event_type, info); + return call_fib_notifiers(net, event_type, info); } -static unsigned int fib_seq_sum(void) +static unsigned int fib4_seq_read(struct net *net) { - unsigned int fib_seq = 0; - struct net *net; - - rtnl_lock(); - for_each_net(net) - fib_seq += net->ipv4.fib_seq; - rtnl_unlock(); + ASSERT_RTNL(); - return fib_seq; + return net->ipv4.fib_seq + fib4_rules_seq_read(net); } -static bool fib_dump_is_consistent(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb), - unsigned int fib_seq) +static int fib4_dump(struct net *net, struct notifier_block *nb) { - atomic_notifier_chain_register(&fib_chain, nb); - if (fib_seq == fib_seq_sum()) - return true; - atomic_notifier_chain_unregister(&fib_chain, nb); - if (cb) - cb(nb); - return false; + int err; + + err = fib4_rules_dump(net, nb); + if (err) + return err; + + fib_notify(net, nb); + + return 0; } -#define FIB_DUMP_MAX_RETRIES 5 -int register_fib_notifier(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb)) -{ - int retries = 0; +static const struct fib_notifier_ops fib4_notifier_ops_template = { + .family = AF_INET, + .fib_seq_read = fib4_seq_read, + .fib_dump = fib4_dump, +}; - do { - unsigned int fib_seq = fib_seq_sum(); - struct net *net; +int __net_init fib4_notifier_init(struct net *net) +{ + struct fib_notifier_ops *ops; - /* Mutex semantics guarantee that every change done to - * FIB tries before we read the change sequence counter - * is now visible to us. - */ - rcu_read_lock(); - for_each_net_rcu(net) { - fib_rules_notify(net, nb); - fib_notify(net, nb); - } - rcu_read_unlock(); + net->ipv4.fib_seq = 0; - if (fib_dump_is_consistent(nb, cb, fib_seq)) - return 0; - } while (++retries < FIB_DUMP_MAX_RETRIES); + ops = fib_notifier_ops_register(&fib4_notifier_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); + net->ipv4.notifier_ops = ops; - return -EBUSY; + return 0; } -EXPORT_SYMBOL(register_fib_notifier); -int unregister_fib_notifier(struct notifier_block *nb) +void __net_exit fib4_notifier_exit(struct net *net) { - return atomic_notifier_chain_unregister(&fib_chain, nb); + fib_notifier_ops_unregister(net->ipv4.notifier_ops); } -EXPORT_SYMBOL(unregister_fib_notifier); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 778ecf977eb2..35d646a62ad4 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -68,6 +68,16 @@ bool fib4_rule_default(const struct fib_rule *rule) } EXPORT_SYMBOL_GPL(fib4_rule_default); +int fib4_rules_dump(struct net *net, struct notifier_block *nb) +{ + return fib_rules_dump(net, nb, AF_INET); +} + +unsigned int fib4_rules_seq_read(struct net *net) +{ + return fib_rules_seq_read(net, AF_INET); +} + int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags) { @@ -185,38 +195,6 @@ static struct fib_table *fib_empty_table(struct net *net) return NULL; } -static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net, - enum fib_event_type event_type, - struct fib_rule *rule) -{ - struct fib_rule_notifier_info info = { - .rule = rule, - }; - - return call_fib_notifier(nb, net, event_type, &info.info); -} - -static int call_fib_rule_notifiers(struct net *net, - enum fib_event_type event_type, - struct fib_rule *rule) -{ - struct fib_rule_notifier_info info = { - .rule = rule, - }; - - return call_fib_notifiers(net, event_type, &info.info); -} - -/* Called with rcu_read_lock() */ -void fib_rules_notify(struct net *net, struct notifier_block *nb) -{ - struct fib_rules_ops *ops = net->ipv4.rules_ops; - struct fib_rule *rule; - - list_for_each_entry_rcu(rule, &ops->rules_list, list) - call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule); -} - static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { FRA_GENERIC_POLICY, [FRA_FLOW] = { .type = NLA_U32 }, @@ -273,7 +251,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; - call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule); err = 0; errout: @@ -295,7 +272,6 @@ static int fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; - call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule); errout: return err; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b8d18171cca3..632b454ce77c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -44,6 +44,7 @@ #include <net/netlink.h> #include <net/nexthop.h> #include <net/lwtunnel.h> +#include <net/fib_notifier.h> #include "fib_lookup.h" @@ -1342,6 +1343,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) rtm->rtm_flags |= RTNH_F_DEAD; } + if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD) + rtm->rtm_flags |= RTNH_F_OFFLOAD; #ifdef CONFIG_IP_ROUTE_CLASSID if (fi->fib_nh[0].nh_tclassid && nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) @@ -1449,14 +1452,14 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh, if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && fib_nh->nh_flags & RTNH_F_LINKDOWN) break; - return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type, - &info.info); + return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type, + &info.info); case FIB_EVENT_NH_DEL: if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && fib_nh->nh_flags & RTNH_F_LINKDOWN) || (fib_nh->nh_flags & RTNH_F_DEAD)) - return call_fib_notifiers(dev_net(fib_nh->nh_dev), - event_type, &info.info); + return call_fib4_notifiers(dev_net(fib_nh->nh_dev), + event_type, &info.info); default: break; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 64668c69dda6..1a6ffb0dab9c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -81,6 +81,7 @@ #include <net/tcp.h> #include <net/sock.h> #include <net/ip_fib.h> +#include <net/fib_notifier.h> #include <trace/events/fib.h> #include "fib_lookup.h" @@ -97,7 +98,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, .type = type, .tb_id = tb_id, }; - return call_fib_notifier(nb, net, event_type, &info.info); + return call_fib4_notifier(nb, net, event_type, &info.info); } static int call_fib_entry_notifiers(struct net *net, @@ -113,7 +114,7 @@ static int call_fib_entry_notifiers(struct net *net, .type = type, .tb_id = tb_id, }; - return call_fib_notifiers(net, event_type, &info.info); + return call_fib4_notifiers(net, event_type, &info.info); } #define MAX_STAT_DEPTH 32 diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index d5cac99170b1..416bb304a281 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, __be16 protocol = skb->protocol; u16 mac_len = skb->mac_len; int gre_offset, outer_hlen; - bool need_csum, ufo, gso_partial; + bool need_csum, gso_partial; if (!skb->encapsulation) goto out; @@ -47,20 +47,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM); skb->encap_hdr_csum = need_csum; - ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); - features &= skb->dev->hw_enc_features; - /* The only checksum offload we care about from here on out is the - * outer one so strip the existing checksum feature flags based - * on the fact that we will be computing our checksum in software. - */ - if (ufo) { - features &= ~NETIF_F_CSUM_MASK; - if (!need_csum) - features |= NETIF_F_HW_CSUM; - } - /* segment inner packet. */ segs = skb_mac_gso_segment(skb, features); if (IS_ERR_OR_NULL(segs)) { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c2be26b98b5f..681e33998e03 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -412,7 +412,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) int type = icmp_param->data.icmph.type; int code = icmp_param->data.icmph.code; - if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) + if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb)) return; /* Needed by both icmp_global_allow and icmp_xmit_lock */ @@ -694,7 +694,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) iph->tos; mark = IP4_REPLY_MARK(net, skb_in->mark); - if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) + if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in)) goto out_unlock; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 498706b072fb..9f86b5133605 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2549,7 +2549,8 @@ done: /* * check if a multicast source filter allows delivery for a given <src,dst,intf> */ -int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) +int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, + int dif, int sdif) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *pmc; @@ -2564,7 +2565,8 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) rcu_read_lock(); for_each_pmc_rcu(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == loc_addr && - pmc->multi.imr_ifindex == dif) + (pmc->multi.imr_ifindex == dif || + (sdif && pmc->multi.imr_ifindex == sdif))) break; } ret = inet->mc_all; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 2e3389d614d1..597bb4cfe805 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -170,7 +170,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const __be32 daddr, - const int dif, bool exact_dif) + const int dif, const int sdif, bool exact_dif) { int score = -1; struct inet_sock *inet = inet_sk(sk); @@ -185,9 +185,13 @@ static inline int compute_score(struct sock *sk, struct net *net, score += 4; } if (sk->sk_bound_dev_if || exact_dif) { - if (sk->sk_bound_dev_if != dif) + bool dev_match = (sk->sk_bound_dev_if == dif || + sk->sk_bound_dev_if == sdif); + + if (exact_dif && !dev_match) return -1; - score += 4; + if (sk->sk_bound_dev_if && dev_match) + score += 4; } if (sk->sk_incoming_cpu == raw_smp_processor_id()) score++; @@ -208,7 +212,7 @@ struct sock *__inet_lookup_listener(struct net *net, struct sk_buff *skb, int doff, const __be32 saddr, __be16 sport, const __be32 daddr, const unsigned short hnum, - const int dif) + const int dif, const int sdif) { unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; @@ -218,7 +222,8 @@ struct sock *__inet_lookup_listener(struct net *net, u32 phash = 0; sk_for_each_rcu(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, dif, exact_dif); + score = compute_score(sk, net, hnum, daddr, + dif, sdif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -268,7 +273,7 @@ struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, - const int dif) + const int dif, const int sdif) { INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(sport, hnum); @@ -286,11 +291,12 @@ begin: if (sk->sk_hash != hash) continue; if (likely(INET_MATCH(sk, net, acookie, - saddr, daddr, ports, dif))) { + saddr, daddr, ports, dif, sdif))) { if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!INET_MATCH(sk, net, acookie, - saddr, daddr, ports, dif))) { + saddr, daddr, ports, + dif, sdif))) { sock_gen_put(sk); goto begin; } @@ -321,9 +327,10 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, __be32 daddr = inet->inet_rcv_saddr; __be32 saddr = inet->inet_daddr; int dif = sk->sk_bound_dev_if; + struct net *net = sock_net(sk); + int sdif = l3mdev_master_ifindex_by_index(net, dif); INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); - struct net *net = sock_net(sk); unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); @@ -339,7 +346,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, continue; if (likely(INET_MATCH(sk2, net, acookie, - saddr, daddr, ports, dif))) { + saddr, daddr, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (twsk_unique(sk, sk2, twp)) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index c5a117cc6619..337ad41bb80a 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -33,7 +33,7 @@ * also be removed if the pool is overloaded i.e. if the total amount of * entries is greater-or-equal than the threshold. * - * Node pool is organised as an AVL tree. + * Node pool is organised as an RB tree. * Such an implementation has been chosen not just for fun. It's a way to * prevent easy and efficient DoS attacks by creating hash collisions. A huge * amount of long living nodes in a single hash slot would significantly delay @@ -45,7 +45,7 @@ * AND reference count being 0. * 3. Global variable peer_total is modified under the pool lock. * 4. struct inet_peer fields modification: - * avl_left, avl_right, avl_parent, avl_height: pool lock + * rb_node: pool lock * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing * daddr: unchangeable @@ -53,30 +53,15 @@ static struct kmem_cache *peer_cachep __read_mostly; -static LIST_HEAD(gc_list); -static const int gc_delay = 60 * HZ; -static struct delayed_work gc_work; -static DEFINE_SPINLOCK(gc_lock); - -#define node_height(x) x->avl_height - -#define peer_avl_empty ((struct inet_peer *)&peer_fake_node) -#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node) -static const struct inet_peer peer_fake_node = { - .avl_left = peer_avl_empty_rcu, - .avl_right = peer_avl_empty_rcu, - .avl_height = 0 -}; - void inet_peer_base_init(struct inet_peer_base *bp) { - bp->root = peer_avl_empty_rcu; + bp->rb_root = RB_ROOT; seqlock_init(&bp->lock); bp->total = 0; } EXPORT_SYMBOL_GPL(inet_peer_base_init); -#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ +#define PEER_MAX_GC 32 /* Exported for sysctl_net_ipv4. */ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more @@ -84,53 +69,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ -static void inetpeer_gc_worker(struct work_struct *work) -{ - struct inet_peer *p, *n, *c; - struct list_head list; - - spin_lock_bh(&gc_lock); - list_replace_init(&gc_list, &list); - spin_unlock_bh(&gc_lock); - - if (list_empty(&list)) - return; - - list_for_each_entry_safe(p, n, &list, gc_list) { - - if (need_resched()) - cond_resched(); - - c = rcu_dereference_protected(p->avl_left, 1); - if (c != peer_avl_empty) { - list_add_tail(&c->gc_list, &list); - p->avl_left = peer_avl_empty_rcu; - } - - c = rcu_dereference_protected(p->avl_right, 1); - if (c != peer_avl_empty) { - list_add_tail(&c->gc_list, &list); - p->avl_right = peer_avl_empty_rcu; - } - - n = list_entry(p->gc_list.next, struct inet_peer, gc_list); - - if (refcount_read(&p->refcnt) == 1) { - list_del(&p->gc_list); - kmem_cache_free(peer_cachep, p); - } - } - - if (list_empty(&list)) - return; - - spin_lock_bh(&gc_lock); - list_splice(&list, &gc_list); - spin_unlock_bh(&gc_lock); - - schedule_delayed_work(&gc_work, gc_delay); -} - /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { @@ -153,225 +91,62 @@ void __init inet_initpeers(void) sizeof(struct inet_peer), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - - INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker); } -#define rcu_deref_locked(X, BASE) \ - rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock)) - -/* - * Called with local BH disabled and the pool lock held. - */ -#define lookup(_daddr, _stack, _base) \ -({ \ - struct inet_peer *u; \ - struct inet_peer __rcu **v; \ - \ - stackptr = _stack; \ - *stackptr++ = &_base->root; \ - for (u = rcu_deref_locked(_base->root, _base); \ - u != peer_avl_empty;) { \ - int cmp = inetpeer_addr_cmp(_daddr, &u->daddr); \ - if (cmp == 0) \ - break; \ - if (cmp == -1) \ - v = &u->avl_left; \ - else \ - v = &u->avl_right; \ - *stackptr++ = v; \ - u = rcu_deref_locked(*v, _base); \ - } \ - u; \ -}) - -/* - * Called with rcu_read_lock() - * Because we hold no lock against a writer, its quite possible we fall - * in an endless loop. - * But every pointer we follow is guaranteed to be valid thanks to RCU. - * We exit from this function if number of links exceeds PEER_MAXDEPTH - */ -static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, - struct inet_peer_base *base) +/* Called with rcu_read_lock() or base->lock held */ +static struct inet_peer *lookup(const struct inetpeer_addr *daddr, + struct inet_peer_base *base, + unsigned int seq, + struct inet_peer *gc_stack[], + unsigned int *gc_cnt, + struct rb_node **parent_p, + struct rb_node ***pp_p) { - struct inet_peer *u = rcu_dereference(base->root); - int count = 0; + struct rb_node **pp, *parent; + struct inet_peer *p; + + pp = &base->rb_root.rb_node; + parent = NULL; + while (*pp) { + int cmp; - while (u != peer_avl_empty) { - int cmp = inetpeer_addr_cmp(daddr, &u->daddr); + parent = rcu_dereference_raw(*pp); + p = rb_entry(parent, struct inet_peer, rb_node); + cmp = inetpeer_addr_cmp(daddr, &p->daddr); if (cmp == 0) { - /* Before taking a reference, check if this entry was - * deleted (refcnt=0) - */ - if (!refcount_inc_not_zero(&u->refcnt)) { - u = NULL; - } - return u; + if (!refcount_inc_not_zero(&p->refcnt)) + break; + return p; + } + if (gc_stack) { + if (*gc_cnt < PEER_MAX_GC) + gc_stack[(*gc_cnt)++] = p; + } else if (unlikely(read_seqretry(&base->lock, seq))) { + break; } if (cmp == -1) - u = rcu_dereference(u->avl_left); + pp = &(*pp)->rb_left; else - u = rcu_dereference(u->avl_right); - if (unlikely(++count == PEER_MAXDEPTH)) - break; + pp = &(*pp)->rb_right; } + *parent_p = parent; + *pp_p = pp; return NULL; } -/* Called with local BH disabled and the pool lock held. */ -#define lookup_rightempty(start, base) \ -({ \ - struct inet_peer *u; \ - struct inet_peer __rcu **v; \ - *stackptr++ = &start->avl_left; \ - v = &start->avl_left; \ - for (u = rcu_deref_locked(*v, base); \ - u->avl_right != peer_avl_empty_rcu;) { \ - v = &u->avl_right; \ - *stackptr++ = v; \ - u = rcu_deref_locked(*v, base); \ - } \ - u; \ -}) - -/* Called with local BH disabled and the pool lock held. - * Variable names are the proof of operation correctness. - * Look into mm/map_avl.c for more detail description of the ideas. - */ -static void peer_avl_rebalance(struct inet_peer __rcu **stack[], - struct inet_peer __rcu ***stackend, - struct inet_peer_base *base) -{ - struct inet_peer __rcu **nodep; - struct inet_peer *node, *l, *r; - int lh, rh; - - while (stackend > stack) { - nodep = *--stackend; - node = rcu_deref_locked(*nodep, base); - l = rcu_deref_locked(node->avl_left, base); - r = rcu_deref_locked(node->avl_right, base); - lh = node_height(l); - rh = node_height(r); - if (lh > rh + 1) { /* l: RH+2 */ - struct inet_peer *ll, *lr, *lrl, *lrr; - int lrh; - ll = rcu_deref_locked(l->avl_left, base); - lr = rcu_deref_locked(l->avl_right, base); - lrh = node_height(lr); - if (lrh <= node_height(ll)) { /* ll: RH+1 */ - RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ - RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ - node->avl_height = lrh + 1; /* RH+1 or RH+2 */ - RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */ - RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */ - l->avl_height = node->avl_height + 1; - RCU_INIT_POINTER(*nodep, l); - } else { /* ll: RH, lr: RH+1 */ - lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */ - lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */ - RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ - RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ - node->avl_height = rh + 1; /* node: RH+1 */ - RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */ - RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */ - l->avl_height = rh + 1; /* l: RH+1 */ - RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */ - RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */ - lr->avl_height = rh + 2; - RCU_INIT_POINTER(*nodep, lr); - } - } else if (rh > lh + 1) { /* r: LH+2 */ - struct inet_peer *rr, *rl, *rlr, *rll; - int rlh; - rr = rcu_deref_locked(r->avl_right, base); - rl = rcu_deref_locked(r->avl_left, base); - rlh = node_height(rl); - if (rlh <= node_height(rr)) { /* rr: LH+1 */ - RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ - RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ - node->avl_height = rlh + 1; /* LH+1 or LH+2 */ - RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */ - RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */ - r->avl_height = node->avl_height + 1; - RCU_INIT_POINTER(*nodep, r); - } else { /* rr: RH, rl: RH+1 */ - rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */ - rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */ - RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ - RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ - node->avl_height = lh + 1; /* node: LH+1 */ - RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */ - RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */ - r->avl_height = lh + 1; /* r: LH+1 */ - RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */ - RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */ - rl->avl_height = lh + 2; - RCU_INIT_POINTER(*nodep, rl); - } - } else { - node->avl_height = (lh > rh ? lh : rh) + 1; - } - } -} - -/* Called with local BH disabled and the pool lock held. */ -#define link_to_pool(n, base) \ -do { \ - n->avl_height = 1; \ - n->avl_left = peer_avl_empty_rcu; \ - n->avl_right = peer_avl_empty_rcu; \ - /* lockless readers can catch us now */ \ - rcu_assign_pointer(**--stackptr, n); \ - peer_avl_rebalance(stack, stackptr, base); \ -} while (0) - static void inetpeer_free_rcu(struct rcu_head *head) { kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); } -static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, - struct inet_peer __rcu **stack[PEER_MAXDEPTH]) -{ - struct inet_peer __rcu ***stackptr, ***delp; - - if (lookup(&p->daddr, stack, base) != p) - BUG(); - delp = stackptr - 1; /* *delp[0] == p */ - if (p->avl_left == peer_avl_empty_rcu) { - *delp[0] = p->avl_right; - --stackptr; - } else { - /* look for a node to insert instead of p */ - struct inet_peer *t; - t = lookup_rightempty(p, base); - BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); - **--stackptr = t->avl_left; - /* t is removed, t->daddr > x->daddr for any - * x in p->avl_left subtree. - * Put t in the old place of p. */ - RCU_INIT_POINTER(*delp[0], t); - t->avl_left = p->avl_left; - t->avl_right = p->avl_right; - t->avl_height = p->avl_height; - BUG_ON(delp[1] != &p->avl_left); - delp[1] = &t->avl_left; /* was &p->avl_left */ - } - peer_avl_rebalance(stack, stackptr, base); - base->total--; - call_rcu(&p->rcu, inetpeer_free_rcu); -} - /* perform garbage collect on all items stacked during a lookup */ -static int inet_peer_gc(struct inet_peer_base *base, - struct inet_peer __rcu **stack[PEER_MAXDEPTH], - struct inet_peer __rcu ***stackptr) +static void inet_peer_gc(struct inet_peer_base *base, + struct inet_peer *gc_stack[], + unsigned int gc_cnt) { - struct inet_peer *p, *gchead = NULL; + struct inet_peer *p; __u32 delta, ttl; - int cnt = 0; + int i; if (base->total >= inet_peer_threshold) ttl = 0; /* be aggressive */ @@ -379,43 +154,38 @@ static int inet_peer_gc(struct inet_peer_base *base, ttl = inet_peer_maxttl - (inet_peer_maxttl - inet_peer_minttl) / HZ * base->total / inet_peer_threshold * HZ; - stackptr--; /* last stack slot is peer_avl_empty */ - while (stackptr > stack) { - stackptr--; - p = rcu_deref_locked(**stackptr, base); - if (refcount_read(&p->refcnt) == 1) { - smp_rmb(); - delta = (__u32)jiffies - p->dtime; - if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) { - p->gc_next = gchead; - gchead = p; - } - } + for (i = 0; i < gc_cnt; i++) { + p = gc_stack[i]; + delta = (__u32)jiffies - p->dtime; + if (delta < ttl || !refcount_dec_if_one(&p->refcnt)) + gc_stack[i] = NULL; } - while ((p = gchead) != NULL) { - gchead = p->gc_next; - cnt++; - unlink_from_pool(p, base, stack); + for (i = 0; i < gc_cnt; i++) { + p = gc_stack[i]; + if (p) { + rb_erase(&p->rb_node, &base->rb_root); + base->total--; + call_rcu(&p->rcu, inetpeer_free_rcu); + } } - return cnt; } struct inet_peer *inet_getpeer(struct inet_peer_base *base, const struct inetpeer_addr *daddr, int create) { - struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer *p; - unsigned int sequence; - int invalidated, gccnt = 0; + struct inet_peer *p, *gc_stack[PEER_MAX_GC]; + struct rb_node **pp, *parent; + unsigned int gc_cnt, seq; + int invalidated; /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ rcu_read_lock(); - sequence = read_seqbegin(&base->lock); - p = lookup_rcu(daddr, base); - invalidated = read_seqretry(&base->lock, sequence); + seq = read_seqbegin(&base->lock); + p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp); + invalidated = read_seqretry(&base->lock, seq); rcu_read_unlock(); if (p) @@ -428,36 +198,31 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, /* retry an exact lookup, taking the lock before. * At least, nodes should be hot in our cache. */ + parent = NULL; write_seqlock_bh(&base->lock); -relookup: - p = lookup(daddr, stack, base); - if (p != peer_avl_empty) { - refcount_inc(&p->refcnt); - write_sequnlock_bh(&base->lock); - return p; - } - if (!gccnt) { - gccnt = inet_peer_gc(base, stack, stackptr); - if (gccnt && create) - goto relookup; - } - p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; - if (p) { - p->daddr = *daddr; - refcount_set(&p->refcnt, 2); - atomic_set(&p->rid, 0); - p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; - p->rate_tokens = 0; - /* 60*HZ is arbitrary, but chosen enough high so that the first - * calculation of tokens is at its maximum. - */ - p->rate_last = jiffies - 60*HZ; - INIT_LIST_HEAD(&p->gc_list); - /* Link the node. */ - link_to_pool(p, base); - base->total++; + gc_cnt = 0; + p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp); + if (!p && create) { + p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC); + if (p) { + p->daddr = *daddr; + refcount_set(&p->refcnt, 2); + atomic_set(&p->rid, 0); + p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; + p->rate_tokens = 0; + /* 60*HZ is arbitrary, but chosen enough high so that the first + * calculation of tokens is at its maximum. + */ + p->rate_last = jiffies - 60*HZ; + + rb_link_node(&p->rb_node, parent, pp); + rb_insert_color(&p->rb_node, &base->rb_root); + base->total++; + } } + if (gc_cnt) + inet_peer_gc(base, gc_stack, gc_cnt); write_sequnlock_bh(&base->lock); return p; @@ -467,8 +232,9 @@ EXPORT_SYMBOL_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { p->dtime = (__u32)jiffies; - smp_mb__before_atomic(); - refcount_dec(&p->refcnt); + + if (refcount_dec_and_test(&p->refcnt)) + call_rcu(&p->rcu, inetpeer_free_rcu); } EXPORT_SYMBOL_GPL(inet_putpeer); @@ -513,30 +279,16 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) } EXPORT_SYMBOL(inet_peer_xrlim_allow); -static void inetpeer_inval_rcu(struct rcu_head *head) -{ - struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu); - - spin_lock_bh(&gc_lock); - list_add_tail(&p->gc_list, &gc_list); - spin_unlock_bh(&gc_lock); - - schedule_delayed_work(&gc_work, gc_delay); -} - void inetpeer_invalidate_tree(struct inet_peer_base *base) { - struct inet_peer *root; - - write_seqlock_bh(&base->lock); + struct inet_peer *p, *n; - root = rcu_deref_locked(base->root, base); - if (root != peer_avl_empty) { - base->root = peer_avl_empty_rcu; - base->total = 0; - call_rcu(&root->gc_rcu, inetpeer_inval_rcu); + rbtree_postorder_for_each_entry_safe(p, n, &base->rb_root, rb_node) { + inet_putpeer(p); + cond_resched(); } - write_sequnlock_bh(&base->lock); + base->rb_root = RB_ROOT; + base->total = 0; } EXPORT_SYMBOL(inetpeer_invalidate_tree); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 93157f2f4758..525ae88d1e58 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -86,8 +86,8 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, * NOTE: dopt cannot point to skb. */ -int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb, - const struct ip_options *sopt) +int __ip_options_echo(struct net *net, struct ip_options *dopt, + struct sk_buff *skb, const struct ip_options *sopt) { unsigned char *sptr, *dptr; int soffset, doffset; @@ -140,7 +140,7 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb, __be32 addr; memcpy(&addr, dptr+soffset-1, 4); - if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) { + if (inet_addr_type(net, addr) != RTN_UNICAST) { dopt->ts_needtime = 1; soffset += 8; } @@ -174,9 +174,6 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb, doffset -= 4; } if (doffset > 3) { - __be32 daddr = fib_compute_spec_dst(skb); - - memcpy(&start[doffset-1], &daddr, 4); dopt->faddr = faddr; dptr[0] = start[0]; dptr[1] = doffset+3; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e153c40c2436..73b0b15245b6 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -853,61 +853,6 @@ csum_page(struct page *page, int offset, int copy) return csum; } -static inline int ip_ufo_append_data(struct sock *sk, - struct sk_buff_head *queue, - int getfrag(void *from, char *to, int offset, int len, - int odd, struct sk_buff *skb), - void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int maxfraglen, unsigned int flags) -{ - struct sk_buff *skb; - int err; - - /* There is support for UDP fragmentation offload by network - * device, so create one single skb packet containing complete - * udp datagram - */ - skb = skb_peek_tail(queue); - if (!skb) { - skb = sock_alloc_send_skb(sk, - hh_len + fragheaderlen + transhdrlen + 20, - (flags & MSG_DONTWAIT), &err); - - if (!skb) - return err; - - /* reserve space for Hardware header */ - skb_reserve(skb, hh_len); - - /* create space for UDP/IP header */ - skb_put(skb, fragheaderlen + transhdrlen); - - /* initialize network header pointer */ - skb_reset_network_header(skb); - - /* initialize protocol header pointer */ - skb->transport_header = skb->network_header + fragheaderlen; - - skb->csum = 0; - - if (flags & MSG_CONFIRM) - skb_set_dst_pending_confirm(skb, 1); - - __skb_queue_tail(queue, skb); - } else if (skb_is_gso(skb)) { - goto append; - } - - skb->ip_summed = CHECKSUM_PARTIAL; - /* specify the length of each IP datagram fragment */ - skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - -append: - return skb_append_datato_frags(sk, skb, getfrag, from, - (length - transhdrlen)); -} - static int __ip_append_data(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, @@ -965,19 +910,6 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if ((skb && skb_is_gso(skb)) || - (((length + (skb ? skb->len : fragheaderlen)) > mtu) && - (skb_queue_len(queue) <= 1) && - (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && - (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) { - err = ip_ufo_append_data(sk, queue, getfrag, from, length, - hh_len, fragheaderlen, transhdrlen, - maxfraglen, flags); - if (err) - goto error; - return 0; - } /* So, what's going on in the loop below? * @@ -1288,16 +1220,6 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, if (!skb) return -EINVAL; - if ((size + skb->len > mtu) && - (skb_queue_len(&sk->sk_write_queue) == 1) && - (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO)) { - if (skb->ip_summed != CHECKSUM_PARTIAL) - return -EOPNOTSUPP; - - skb_shinfo(skb)->gso_size = mtu - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - } cork->length += size; while (size > 0) { @@ -1603,7 +1525,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, int err; int oif; - if (__ip_options_echo(&replyopts.opt.opt, skb, sopt)) + if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt)) return; ipc.addr = daddr; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ecc4b4a2413e..dd68a9ed5e40 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -80,7 +80,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) } -static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) +static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg, + struct sk_buff *skb) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; @@ -88,7 +89,7 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) if (IPCB(skb)->opt.optlen == 0) return; - if (ip_options_echo(opt, skb)) { + if (ip_options_echo(net, opt, skb)) { msg->msg_flags |= MSG_CTRUNC; return; } @@ -204,7 +205,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, } if (flags & IP_CMSG_RETOPTS) { - ip_cmsg_recv_retopts(msg, skb); + ip_cmsg_recv_retopts(sock_net(sk), msg, skb); flags &= ~IP_CMSG_RETOPTS; if (!flags) @@ -1227,14 +1228,7 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - /* We need to keep the dst for __ip_options_echo() - * We could restrict the test to opt.ts_needtime || opt.srr, - * but the following is good enough as IP options are not often used. - */ - if (unlikely(IPCB(skb)->opt.optlen)) - skb_dst_force(skb); - else - skb_dst_drop(skb); + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 0192c255e508..5ed63d250950 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -584,33 +584,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { .get_link_net = ip_tunnel_get_link_net, }; -static bool is_vti_tunnel(const struct net_device *dev) -{ - return dev->netdev_ops == &vti_netdev_ops; -} - -static int vti_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct ip_tunnel *tunnel = netdev_priv(dev); - - if (!is_vti_tunnel(dev)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_DOWN: - if (!net_eq(tunnel->net, dev_net(dev))) - xfrm_garbage_collect(tunnel->net); - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block vti_notifier_block __read_mostly = { - .notifier_call = vti_device_event, -}; - static int __init vti_init(void) { const char *msg; @@ -618,8 +591,6 @@ static int __init vti_init(void) pr_info("IPv4 over IPsec tunneling driver\n"); - register_netdevice_notifier(&vti_notifier_block); - msg = "tunnel device"; err = register_pernet_device(&vti_net_ops); if (err < 0) @@ -652,7 +623,6 @@ xfrm_proto_ah_failed: xfrm_proto_esp_failed: unregister_pernet_device(&vti_net_ops); pernet_dev_failed: - unregister_netdevice_notifier(&vti_notifier_block); pr_err("vti init: failed to register %s\n", msg); return err; } @@ -664,7 +634,6 @@ static void __exit vti_fini(void) xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); unregister_pernet_device(&vti_net_ops); - unregister_netdevice_notifier(&vti_notifier_block); } module_init(vti_init); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 06863ea3fc5b..c9b3e6e069ae 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -3114,14 +3114,14 @@ int __init ip_mr_init(void) } #endif rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, - ipmr_rtm_getroute, ipmr_rtm_dumproute, NULL); + ipmr_rtm_getroute, ipmr_rtm_dumproute, 0); rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, - ipmr_rtm_route, NULL, NULL); + ipmr_rtm_route, NULL, 0); rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, - ipmr_rtm_route, NULL, NULL); + ipmr_rtm_route, NULL, 0); rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, - NULL, ipmr_rtm_dumplink, NULL); + NULL, ipmr_rtm_dumplink, 0); return 0; #ifdef CONFIG_IP_PIMSM_V2 diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 43eb6567b3a0..b6d3fe03feb3 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -206,14 +206,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST), SNMP_MIB_ITEM("ListenOverflows", LINUX_MIB_LISTENOVERFLOWS), SNMP_MIB_ITEM("ListenDrops", LINUX_MIB_LISTENDROPS), - SNMP_MIB_ITEM("TCPPrequeued", LINUX_MIB_TCPPREQUEUED), - SNMP_MIB_ITEM("TCPDirectCopyFromBacklog", LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG), - SNMP_MIB_ITEM("TCPDirectCopyFromPrequeue", LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE), - SNMP_MIB_ITEM("TCPPrequeueDropped", LINUX_MIB_TCPPREQUEUEDROPPED), - SNMP_MIB_ITEM("TCPHPHits", LINUX_MIB_TCPHPHITS), - SNMP_MIB_ITEM("TCPHPHitsToUser", LINUX_MIB_TCPHPHITSTOUSER), SNMP_MIB_ITEM("TCPPureAcks", LINUX_MIB_TCPPUREACKS), - SNMP_MIB_ITEM("TCPHPAcks", LINUX_MIB_TCPHPACKS), SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY), SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY), SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING), @@ -230,14 +223,12 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES), SNMP_MIB_ITEM("TCPLossFailures", LINUX_MIB_TCPLOSSFAILURES), SNMP_MIB_ITEM("TCPFastRetrans", LINUX_MIB_TCPFASTRETRANS), - SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS), SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS), SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS), SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES), SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY), SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL), SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL), - SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED), SNMP_MIB_ITEM("TCPRcvCollapsed", LINUX_MIB_TCPRCVCOLLAPSED), SNMP_MIB_ITEM("TCPDSACKOldSent", LINUX_MIB_TCPDSACKOLDSENT), SNMP_MIB_ITEM("TCPDSACKOfoSent", LINUX_MIB_TCPDSACKOFOSENT), diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index b0bb5d0a30bd..33b70bfd1122 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -122,7 +122,8 @@ void raw_unhash_sk(struct sock *sk) EXPORT_SYMBOL_GPL(raw_unhash_sk); struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, - unsigned short num, __be32 raddr, __be32 laddr, int dif) + unsigned short num, __be32 raddr, __be32 laddr, + int dif, int sdif) { sk_for_each_from(sk) { struct inet_sock *inet = inet_sk(sk); @@ -130,7 +131,8 @@ struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, if (net_eq(sock_net(sk), net) && inet->inet_num == num && !(inet->inet_daddr && inet->inet_daddr != raddr) && !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && - !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && + sk->sk_bound_dev_if != sdif)) goto found; /* gotcha */ } sk = NULL; @@ -171,6 +173,7 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) */ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) { + int sdif = inet_sdif(skb); struct sock *sk; struct hlist_head *head; int delivered = 0; @@ -184,13 +187,13 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) net = dev_net(skb->dev); sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, iph->saddr, iph->daddr, - skb->dev->ifindex); + skb->dev->ifindex, sdif); while (sk) { delivered = 1; if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) && ip_mc_sf_allow(sk, iph->daddr, iph->saddr, - skb->dev->ifindex)) { + skb->dev->ifindex, sdif)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ @@ -199,7 +202,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) } sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, iph->saddr, iph->daddr, - skb->dev->ifindex); + skb->dev->ifindex, sdif); } out: read_unlock(&raw_v4_hashinfo.lock); @@ -297,12 +300,15 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) read_lock(&raw_v4_hashinfo.lock); raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); if (raw_sk) { + int dif = skb->dev->ifindex; + int sdif = inet_sdif(skb); + iph = (const struct iphdr *)skb->data; net = dev_net(skb->dev); while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, iph->daddr, iph->saddr, - skb->dev->ifindex)) != NULL) { + dif, sdif)) != NULL) { raw_err(raw_sk, skb, info); raw_sk = sk_next(raw_sk); iph = (const struct iphdr *)skb->data; diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index e1a51ca68d23..c200065ef9a5 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -46,13 +46,13 @@ static struct sock *raw_lookup(struct net *net, struct sock *from, sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol, r->id.idiag_dst[0], r->id.idiag_src[0], - r->id.idiag_if); + r->id.idiag_if, 0); #if IS_ENABLED(CONFIG_IPV6) else sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol, (const struct in6_addr *)r->id.idiag_src, (const struct in6_addr *)r->id.idiag_dst, - r->id.idiag_if); + r->id.idiag_if, 0); #endif return sk; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0383e66f59bc..2ef46294475f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3067,7 +3067,7 @@ int __init ip_rt_init(void) xfrm_init(); xfrm4_init(); #endif - rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, 0); #ifdef CONFIG_SYSCTL register_pernet_subsys(&sysctl_route_ops); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 03ad8778c395..b1bb1b3a1082 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ - ireq->opt = tcp_v4_save_options(skb); + ireq->opt = tcp_v4_save_options(sock_net(sk), skb); if (security_inet_conn_request(sk, skb, req)) { reqsk_free(req); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 9bf809726066..0d3c038d7b04 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -45,6 +45,9 @@ static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; +/* obsolete */ +static int sysctl_tcp_low_latency __read_mostly; + /* Update system visible IP port range */ static void set_local_port_range(struct net *net, int range[2]) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 71ce33decd97..71b25567e787 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -388,6 +388,19 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max) return period; } +static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp) +{ + u32 rate = READ_ONCE(tp->rate_delivered); + u32 intv = READ_ONCE(tp->rate_interval_us); + u64 rate64 = 0; + + if (rate && intv) { + rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; + do_div(rate64, intv); + } + return rate64; +} + /* Address-family independent initialization for a tcp_sock. * * NOTE: A lot of things set to zero explicitly by call to @@ -400,7 +413,6 @@ void tcp_init_sock(struct sock *sk) tp->out_of_order_queue = RB_ROOT; tcp_init_xmit_timers(sk); - tcp_prequeue_init(tp); INIT_LIST_HEAD(&tp->tsq_node); icsk->icsk_rto = TCP_TIMEOUT_INIT; @@ -1034,23 +1046,29 @@ out_err: } EXPORT_SYMBOL_GPL(do_tcp_sendpages); -int tcp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) +int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, + size_t size, int flags) { - ssize_t res; - if (!(sk->sk_route_caps & NETIF_F_SG) || !sk_check_csum_caps(sk)) return sock_no_sendpage(sk->sk_socket, page, offset, size, flags); - lock_sock(sk); - tcp_rate_check_app_limited(sk); /* is sending application-limited? */ - res = do_tcp_sendpages(sk, page, offset, size, flags); + return do_tcp_sendpages(sk, page, offset, size, flags); +} + +int tcp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags) +{ + int ret; + + lock_sock(sk); + ret = tcp_sendpage_locked(sk, page, offset, size, flags); release_sock(sk); - return res; + + return ret; } EXPORT_SYMBOL(tcp_sendpage); @@ -1144,9 +1162,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, return err; } -int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) { struct tcp_sock *tp = tcp_sk(sk); + struct ubuf_info *uarg = NULL; struct sk_buff *skb; struct sockcm_cookie sockc; int flags, err, copied = 0; @@ -1155,9 +1174,27 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) bool sg; long timeo; - lock_sock(sk); - flags = msg->msg_flags; + + if (flags & MSG_ZEROCOPY && size) { + if (sk->sk_state != TCP_ESTABLISHED) { + err = -EINVAL; + goto out_err; + } + + skb = tcp_send_head(sk) ? tcp_write_queue_tail(sk) : NULL; + uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb)); + if (!uarg) { + err = -ENOBUFS; + goto out_err; + } + + /* skb may be freed in main loop, keep extra ref on uarg */ + sock_zerocopy_get(uarg); + if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG)) + uarg->zerocopy = 0; + } + if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) { err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); if (err == -EINPROGRESS && copied_syn > 0) @@ -1281,7 +1318,7 @@ new_segment: err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); if (err) goto do_fault; - } else { + } else if (!uarg || !uarg->zerocopy) { bool merge = true; int i = skb_shinfo(skb)->nr_frags; struct page_frag *pfrag = sk_page_frag(sk); @@ -1319,6 +1356,13 @@ new_segment: page_ref_inc(pfrag->page); } pfrag->offset += copy; + } else { + err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); + if (err == -EMSGSIZE || err == -EEXIST) + goto new_segment; + if (err < 0) + goto do_error; + copy = err; } if (!copied) @@ -1365,7 +1409,7 @@ out: tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); } out_nopush: - release_sock(sk); + sock_zerocopy_put(uarg); return copied + copied_syn; do_fault: @@ -1382,6 +1426,7 @@ do_error: if (copied + copied_syn) goto out; out_err: + sock_zerocopy_put_abort(uarg); err = sk_stream_error(sk, flags, err); /* make sure we wake any epoll edge trigger waiter */ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && @@ -1389,9 +1434,19 @@ out_err: sk->sk_write_space(sk); tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); } - release_sock(sk); return err; } + +int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +{ + int ret; + + lock_sock(sk); + ret = tcp_sendmsg_locked(sk, msg, size); + release_sock(sk); + + return ret; +} EXPORT_SYMBOL(tcp_sendmsg); /* @@ -1525,20 +1580,6 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied) tcp_send_ack(sk); } -static void tcp_prequeue_process(struct sock *sk) -{ - struct sk_buff *skb; - struct tcp_sock *tp = tcp_sk(sk); - - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED); - - while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk_backlog_rcv(sk, skb); - - /* Clear memory counter. */ - tp->ucopy.memory = 0; -} - static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) { struct sk_buff *skb; @@ -1671,7 +1712,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int err; int target; /* Read at least this many bytes */ long timeo; - struct task_struct *user_recv = NULL; struct sk_buff *skb, *last; u32 urg_hole = 0; @@ -1806,51 +1846,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, tcp_cleanup_rbuf(sk, copied); - if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) { - /* Install new reader */ - if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) { - user_recv = current; - tp->ucopy.task = user_recv; - tp->ucopy.msg = msg; - } - - tp->ucopy.len = len; - - WARN_ON(tp->copied_seq != tp->rcv_nxt && - !(flags & (MSG_PEEK | MSG_TRUNC))); - - /* Ugly... If prequeue is not empty, we have to - * process it before releasing socket, otherwise - * order will be broken at second iteration. - * More elegant solution is required!!! - * - * Look: we have the following (pseudo)queues: - * - * 1. packets in flight - * 2. backlog - * 3. prequeue - * 4. receive_queue - * - * Each queue can be processed only if the next ones - * are empty. At this point we have empty receive_queue. - * But prequeue _can_ be not empty after 2nd iteration, - * when we jumped to start of loop because backlog - * processing added something to receive_queue. - * We cannot release_sock(), because backlog contains - * packets arrived _after_ prequeued ones. - * - * Shortly, algorithm is clear --- to process all - * the queues in order. We could make it more directly, - * requeueing packets from backlog to prequeue, if - * is not empty. It is more elegant, but eats cycles, - * unfortunately. - */ - if (!skb_queue_empty(&tp->ucopy.prequeue)) - goto do_prequeue; - - /* __ Set realtime policy in scheduler __ */ - } - if (copied >= target) { /* Do not sleep, just process backlog. */ release_sock(sk); @@ -1859,31 +1854,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, sk_wait_data(sk, &timeo, last); } - if (user_recv) { - int chunk; - - /* __ Restore normal policy in scheduler __ */ - - chunk = len - tp->ucopy.len; - if (chunk != 0) { - NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); - len -= chunk; - copied += chunk; - } - - if (tp->rcv_nxt == tp->copied_seq && - !skb_queue_empty(&tp->ucopy.prequeue)) { -do_prequeue: - tcp_prequeue_process(sk); - - chunk = len - tp->ucopy.len; - if (chunk != 0) { - NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); - len -= chunk; - copied += chunk; - } - } - } if ((flags & MSG_PEEK) && (peek_seq - copied - urg_hole != tp->copied_seq)) { net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n", @@ -1934,10 +1904,8 @@ do_prequeue: tcp_rcv_space_adjust(sk); skip_copy: - if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { + if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) tp->urg_data = 0; - tcp_fast_path_check(sk); - } if (used + offset < skb->len) continue; @@ -1955,25 +1923,6 @@ skip_copy: break; } while (len > 0); - if (user_recv) { - if (!skb_queue_empty(&tp->ucopy.prequeue)) { - int chunk; - - tp->ucopy.len = copied > 0 ? len : 0; - - tcp_prequeue_process(sk); - - if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); - len -= chunk; - copied += chunk; - } - } - - tp->ucopy.task = NULL; - tp->ucopy.len = 0; - } - /* According to UNIX98, msg_name/msg_namelen are ignored * on connected socket. I was just happy when found this 8) --ANK */ @@ -2823,7 +2772,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) { const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); - u32 now, intv; + u32 now; u64 rate64; bool slow; u32 rate; @@ -2922,13 +2871,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_data_segs_out = tp->data_segs_out; info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0; - rate = READ_ONCE(tp->rate_delivered); - intv = READ_ONCE(tp->rate_interval_us); - if (rate && intv) { - rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; - do_div(rate64, intv); + rate64 = tcp_compute_delivery_rate(tp); + if (rate64) info->tcpi_delivery_rate = rate64; - } unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -2938,8 +2883,12 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *stats; struct tcp_info info; + u64 rate64; + u32 rate; - stats = alloc_skb(5 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC); + stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) + + 3 * nla_total_size(sizeof(u32)) + + 2 * nla_total_size(sizeof(u8)), GFP_ATOMIC); if (!stats) return NULL; @@ -2954,6 +2903,20 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) tp->data_segs_out, TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS, tp->total_retrans, TCP_NLA_PAD); + + rate = READ_ONCE(sk->sk_pacing_rate); + rate64 = rate != ~0U ? rate : ~0ULL; + nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD); + + rate64 = tcp_compute_delivery_rate(tp); + nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD); + + nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd); + nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering); + nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp)); + + nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits); + nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); return stats; } diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 609965f0e298..fc3614377413 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -49,7 +49,6 @@ MODULE_PARM_DESC(smooth_part, "log(B/(B*Smin))/log(B/(B-1))+B, # of RTT from Wma struct bictcp { u32 cnt; /* increase cwnd by 1 after ACKs */ u32 last_max_cwnd; /* last maximum snd_cwnd */ - u32 loss_cwnd; /* congestion window at last loss */ u32 last_cwnd; /* the last snd_cwnd */ u32 last_time; /* time when updated last_cwnd */ u32 epoch_start; /* beginning of an epoch */ @@ -72,7 +71,6 @@ static void bictcp_init(struct sock *sk) struct bictcp *ca = inet_csk_ca(sk); bictcp_reset(ca); - ca->loss_cwnd = 0; if (initial_ssthresh) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; @@ -172,22 +170,12 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk) else ca->last_max_cwnd = tp->snd_cwnd; - ca->loss_cwnd = tp->snd_cwnd; - if (tp->snd_cwnd <= low_window) return max(tp->snd_cwnd >> 1U, 2U); else return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); } -static u32 bictcp_undo_cwnd(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - const struct bictcp *ca = inet_csk_ca(sk); - - return max(tp->snd_cwnd, ca->loss_cwnd); -} - static void bictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) @@ -214,7 +202,7 @@ static struct tcp_congestion_ops bictcp __read_mostly = { .ssthresh = bictcp_recalc_ssthresh, .cong_avoid = bictcp_cong_avoid, .set_state = bictcp_state, - .undo_cwnd = bictcp_undo_cwnd, + .undo_cwnd = tcp_reno_undo_cwnd, .pkts_acked = bictcp_acked, .owner = THIS_MODULE, .name = "bic", diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 50a0f3e51d5b..66ac69f7bd19 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -85,7 +85,6 @@ struct cdg { u8 state; u8 delack; u32 rtt_seq; - u32 undo_cwnd; u32 shadow_wnd; u16 backoff_cnt; u16 sample_cnt; @@ -330,8 +329,6 @@ static u32 tcp_cdg_ssthresh(struct sock *sk) struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - ca->undo_cwnd = tp->snd_cwnd; - if (ca->state == CDG_BACKOFF) return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10); @@ -344,13 +341,6 @@ static u32 tcp_cdg_ssthresh(struct sock *sk) return max(2U, tp->snd_cwnd >> 1); } -static u32 tcp_cdg_undo_cwnd(struct sock *sk) -{ - struct cdg *ca = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, ca->undo_cwnd); -} - static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev) { struct cdg *ca = inet_csk_ca(sk); @@ -403,7 +393,7 @@ struct tcp_congestion_ops tcp_cdg __read_mostly = { .cong_avoid = tcp_cdg_cong_avoid, .cwnd_event = tcp_cdg_cwnd_event, .pkts_acked = tcp_cdg_acked, - .undo_cwnd = tcp_cdg_undo_cwnd, + .undo_cwnd = tcp_reno_undo_cwnd, .ssthresh = tcp_cdg_ssthresh, .release = tcp_cdg_release, .init = tcp_cdg_init, diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index fde983f6376b..c2b174469645 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -456,7 +456,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - return max(tp->snd_cwnd, tp->snd_ssthresh << 1); + return max(tp->snd_cwnd, tp->prior_cwnd); } EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 57ae5b5ae643..78bfadfcf342 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -83,7 +83,6 @@ MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (mse struct bictcp { u32 cnt; /* increase cwnd by 1 after ACKs */ u32 last_max_cwnd; /* last maximum snd_cwnd */ - u32 loss_cwnd; /* congestion window at last loss */ u32 last_cwnd; /* the last snd_cwnd */ u32 last_time; /* time when updated last_cwnd */ u32 bic_origin_point;/* origin point of bic function */ @@ -142,7 +141,6 @@ static void bictcp_init(struct sock *sk) struct bictcp *ca = inet_csk_ca(sk); bictcp_reset(ca); - ca->loss_cwnd = 0; if (hystart) bictcp_hystart_reset(sk); @@ -366,18 +364,9 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk) else ca->last_max_cwnd = tp->snd_cwnd; - ca->loss_cwnd = tp->snd_cwnd; - return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); } -static u32 bictcp_undo_cwnd(struct sock *sk) -{ - struct bictcp *ca = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); -} - static void bictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) { @@ -470,7 +459,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .ssthresh = bictcp_recalc_ssthresh, .cong_avoid = bictcp_cong_avoid, .set_state = bictcp_state, - .undo_cwnd = bictcp_undo_cwnd, + .undo_cwnd = tcp_reno_undo_cwnd, .cwnd_event = bictcp_cwnd_event, .pkts_acked = bictcp_acked, .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 6d9879e93648..d1c33c91eadc 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -94,7 +94,6 @@ static const struct hstcp_aimd_val { struct hstcp { u32 ai; - u32 loss_cwnd; }; static void hstcp_init(struct sock *sk) @@ -153,22 +152,14 @@ static u32 hstcp_ssthresh(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); struct hstcp *ca = inet_csk_ca(sk); - ca->loss_cwnd = tp->snd_cwnd; /* Do multiplicative decrease */ return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); } -static u32 hstcp_cwnd_undo(struct sock *sk) -{ - const struct hstcp *ca = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); -} - static struct tcp_congestion_ops tcp_highspeed __read_mostly = { .init = hstcp_init, .ssthresh = hstcp_ssthresh, - .undo_cwnd = hstcp_cwnd_undo, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = hstcp_cong_avoid, .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 3eb78cde6ff0..082d479462fa 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -66,7 +66,6 @@ static inline void htcp_reset(struct htcp *ca) static u32 htcp_cwnd_undo(struct sock *sk) { - const struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); if (ca->undo_last_cong) { @@ -76,7 +75,7 @@ static u32 htcp_cwnd_undo(struct sock *sk) ca->undo_last_cong = 0; } - return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta); + return tcp_reno_undo_cwnd(sk); } static inline void measure_rtt(struct sock *sk, u32 srtt) diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 60352ff4f5a8..7c843578f233 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -48,7 +48,6 @@ struct illinois { u32 end_seq; /* right edge of current RTT */ u32 alpha; /* Additive increase */ u32 beta; /* Muliplicative decrease */ - u32 loss_cwnd; /* cwnd on loss */ u16 acked; /* # packets acked by current ACK */ u8 rtt_above; /* average rtt has gone above threshold */ u8 rtt_low; /* # of rtts measurements below threshold */ @@ -297,18 +296,10 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct illinois *ca = inet_csk_ca(sk); - ca->loss_cwnd = tp->snd_cwnd; /* Multiplicative decrease */ return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U); } -static u32 tcp_illinois_cwnd_undo(struct sock *sk) -{ - const struct illinois *ca = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); -} - /* Extract info for Tcp socket info provided via netlink. */ static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) @@ -336,7 +327,7 @@ static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr, static struct tcp_congestion_ops tcp_illinois __read_mostly = { .init = tcp_illinois_init, .ssthresh = tcp_illinois_ssthresh, - .undo_cwnd = tcp_illinois_cwnd_undo, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_illinois_cong_avoid, .set_state = tcp_illinois_state, .get_info = tcp_illinois_info, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 53de1424c13c..d73903fe8c83 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -103,7 +103,6 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ #define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */ -#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ #define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ @@ -1952,6 +1951,7 @@ void tcp_enter_loss(struct sock *sk) !after(tp->high_seq, tp->snd_una) || (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->prior_cwnd = tp->snd_cwnd; tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); tcp_init_undo(tp); @@ -3372,12 +3372,6 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 if (tp->snd_wnd != nwin) { tp->snd_wnd = nwin; - /* Note, it is the only place, where - * fast path is recovered for sending TCP. - */ - tp->pred_flags = 0; - tcp_fast_path_check(sk); - if (tcp_send_head(sk)) tcp_slow_start_after_idle_check(sk); @@ -3559,6 +3553,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 lost = tp->lost; int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ + u32 ack_ev_flags = 0; sack_state.first_sackt = 0; sack_state.rate = &rs; @@ -3599,42 +3594,26 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (flag & FLAG_UPDATE_TS_RECENT) tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { - /* Window is constant, pure forward advance. - * No more checks are required. - * Note, we use the fact that SND.UNA>=SND.WL2. - */ - tcp_update_wl(tp, ack_seq); - tcp_snd_una_update(tp, ack); - flag |= FLAG_WIN_UPDATE; - - tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); - - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS); - } else { - u32 ack_ev_flags = CA_ACK_SLOWPATH; - - if (ack_seq != TCP_SKB_CB(skb)->end_seq) - flag |= FLAG_DATA; - else - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS); + if (ack_seq != TCP_SKB_CB(skb)->end_seq) + flag |= FLAG_DATA; + else + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS); - flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); + flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); - if (TCP_SKB_CB(skb)->sacked) - flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, - &sack_state); + if (TCP_SKB_CB(skb)->sacked) + flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, + &sack_state); - if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { - flag |= FLAG_ECE; - ack_ev_flags |= CA_ACK_ECE; - } + if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { + flag |= FLAG_ECE; + ack_ev_flags = CA_ACK_ECE; + } - if (flag & FLAG_WIN_UPDATE) - ack_ev_flags |= CA_ACK_WIN_UPDATE; + if (flag & FLAG_WIN_UPDATE) + ack_ev_flags |= CA_ACK_WIN_UPDATE; - tcp_in_ack_event(sk, ack_ev_flags); - } + tcp_in_ack_event(sk, ack_ev_flags); /* We passed data and got it acked, remove any soft error * log. Something worked... @@ -4402,8 +4381,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) return; } - /* Disable header prediction. */ - tp->pred_flags = 0; inet_csk_schedule_ack(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); @@ -4592,8 +4569,8 @@ err: static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - bool fragstolen = false; - int eaten = -1; + bool fragstolen; + int eaten; if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { __kfree_skb(skb); @@ -4615,32 +4592,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) goto out_of_window; /* Ok. In sequence. In window. */ - if (tp->ucopy.task == current && - tp->copied_seq == tp->rcv_nxt && tp->ucopy.len && - sock_owned_by_user(sk) && !tp->urg_data) { - int chunk = min_t(unsigned int, skb->len, - tp->ucopy.len); - - __set_current_state(TASK_RUNNING); - - if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) { - tp->ucopy.len -= chunk; - tp->copied_seq += chunk; - eaten = (chunk == skb->len); - tcp_rcv_space_adjust(sk); - } - } - - if (eaten <= 0) { queue_and_out: - if (eaten < 0) { - if (skb_queue_len(&sk->sk_receive_queue) == 0) - sk_forced_mem_schedule(sk, skb->truesize); - else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) - goto drop; - } - eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); - } + if (skb_queue_len(&sk->sk_receive_queue) == 0) + sk_forced_mem_schedule(sk, skb->truesize); + else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) + goto drop; + + eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); if (skb->len) tcp_event_data_recv(sk, skb); @@ -4660,8 +4618,6 @@ queue_and_out: if (tp->rx_opt.num_sacks) tcp_sack_remove(tp); - tcp_fast_path_check(sk); - if (eaten > 0) kfree_skb_partial(skb, fragstolen); if (!sock_flag(sk, SOCK_DEAD)) @@ -4987,7 +4943,6 @@ static int tcp_prune_queue(struct sock *sk) NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED); /* Massive buffer overcommit. */ - tp->pred_flags = 0; return -1; } @@ -5159,9 +5114,6 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) tp->urg_data = TCP_URG_NOTYET; tp->urg_seq = ptr; - - /* Disable header prediction. */ - tp->pred_flags = 0; } /* This is the 'fast' part of urgent handling. */ @@ -5190,26 +5142,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t } } -static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) -{ - struct tcp_sock *tp = tcp_sk(sk); - int chunk = skb->len - hlen; - int err; - - if (skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk); - else - err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg); - - if (!err) { - tp->ucopy.len -= chunk; - tp->copied_seq += chunk; - tcp_rcv_space_adjust(sk); - } - - return err; -} - /* Accept RST for rcv_nxt - 1 after a FIN. * When tcp connections are abruptly terminated from Mac OSX (via ^C), a * FIN is sent followed by a RST packet. The RST is sent with the same @@ -5340,201 +5272,29 @@ discard: /* * TCP receive function for the ESTABLISHED state. - * - * It is split into a fast path and a slow path. The fast path is - * disabled when: - * - A zero window was announced from us - zero window probing - * is only handled properly in the slow path. - * - Out of order segments arrived. - * - Urgent data is expected. - * - There is no buffer space left - * - Unexpected TCP flags/window values/header lengths are received - * (detected by checking the TCP header against pred_flags) - * - Data is sent in both directions. Fast path only supports pure senders - * or pure receivers (this means either the sequence number or the ack - * value must stay constant) - * - Unexpected TCP option. - * - * When these conditions are not satisfied it drops into a standard - * receive procedure patterned after RFC793 to handle all cases. - * The first three cases are guaranteed by proper pred_flags setting, - * the rest is checked inline. Fast processing is turned on in - * tcp_data_queue when everything is OK. */ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len) + const struct tcphdr *th) { + unsigned int len = skb->len; struct tcp_sock *tp = tcp_sk(sk); tcp_mstamp_refresh(tp); if (unlikely(!sk->sk_rx_dst)) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); - /* - * Header prediction. - * The code loosely follows the one in the famous - * "30 instruction TCP receive" Van Jacobson mail. - * - * Van's trick is to deposit buffers into socket queue - * on a device interrupt, to call tcp_recv function - * on the receive process context and checksum and copy - * the buffer to user space. smart... - * - * Our current scheme is not silly either but we take the - * extra cost of the net_bh soft interrupt processing... - * We do checksum and copy also but from device to kernel. - */ tp->rx_opt.saw_tstamp = 0; - /* pred_flags is 0xS?10 << 16 + snd_wnd - * if header_prediction is to be made - * 'S' will always be tp->tcp_header_len >> 2 - * '?' will be 0 for the fast path, otherwise pred_flags is 0 to - * turn it off (when there are holes in the receive - * space for instance) - * PSH flag is ignored. - */ - - if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && - TCP_SKB_CB(skb)->seq == tp->rcv_nxt && - !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { - int tcp_header_len = tp->tcp_header_len; - - /* Timestamp header prediction: tcp_header_len - * is automatically equal to th->doff*4 due to pred_flags - * match. - */ - - /* Check timestamp */ - if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { - /* No? Slow path! */ - if (!tcp_parse_aligned_timestamp(tp, th)) - goto slow_path; - - /* If PAWS failed, check it more carefully in slow path */ - if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) - goto slow_path; - - /* DO NOT update ts_recent here, if checksum fails - * and timestamp was corrupted part, it will result - * in a hung connection since we will drop all - * future packets due to the PAWS test. - */ - } - - if (len <= tcp_header_len) { - /* Bulk data transfer: sender */ - if (len == tcp_header_len) { - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); - - /* We know that such packets are checksummed - * on entry. - */ - tcp_ack(sk, skb, 0); - __kfree_skb(skb); - tcp_data_snd_check(sk); - return; - } else { /* Header too small */ - TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); - goto discard; - } - } else { - int eaten = 0; - bool fragstolen = false; - - if (tp->ucopy.task == current && - tp->copied_seq == tp->rcv_nxt && - len - tcp_header_len <= tp->ucopy.len && - sock_owned_by_user(sk)) { - __set_current_state(TASK_RUNNING); - - if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) { - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + - TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); - - tcp_rcv_rtt_measure_ts(sk, skb); - - __skb_pull(skb, tcp_header_len); - tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPHPHITSTOUSER); - eaten = 1; - } - } - if (!eaten) { - if (tcp_checksum_complete(skb)) - goto csum_error; - - if ((int)skb->truesize > sk->sk_forward_alloc) - goto step5; - - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); - - tcp_rcv_rtt_measure_ts(sk, skb); - - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); - - /* Bulk data transfer: receiver */ - eaten = tcp_queue_rcv(sk, skb, tcp_header_len, - &fragstolen); - } - - tcp_event_data_recv(sk, skb); - - if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { - /* Well, only one small jumplet in fast path... */ - tcp_ack(sk, skb, FLAG_DATA); - tcp_data_snd_check(sk); - if (!inet_csk_ack_scheduled(sk)) - goto no_ack; - } - - __tcp_ack_snd_check(sk, 0); -no_ack: - if (eaten) - kfree_skb_partial(skb, fragstolen); - sk->sk_data_ready(sk); - return; - } - } - -slow_path: if (len < (th->doff << 2) || tcp_checksum_complete(skb)) goto csum_error; if (!th->ack && !th->rst && !th->syn) goto discard; - /* - * Standard slow path. - */ - if (!tcp_validate_incoming(sk, skb, th, 1)) return; -step5: - if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) + if (tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT) < 0) goto discard; tcp_rcv_rtt_measure_ts(sk, skb); @@ -5587,12 +5347,6 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) if (sock_flag(sk, SOCK_KEEPOPEN)) inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); - - if (!tp->rx_opt.snd_wscale) - __tcp_fast_path_on(tp, tp->snd_wnd); - else - tp->pred_flags = 0; - } static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, @@ -5721,7 +5475,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_ecn_rcv_synack(tp, th); tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - tcp_ack(sk, skb, FLAG_SLOWPATH); + tcp_ack(sk, skb, 0); /* Ok.. it's good. Set up sequence numbers and * move to established. @@ -5957,8 +5711,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) return 0; /* step 5: check the ACK field */ - acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | - FLAG_UPDATE_TS_RECENT | + + acceptable = tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT | FLAG_NO_CHALLENGE_ACK) > 0; if (!acceptable) { @@ -6026,7 +5780,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tp->lsndtime = tcp_jiffies32; tcp_initialize_rcv_mss(sk); - tcp_fast_path_on(tp); break; case TCP_FIN_WAIT1: { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a20e7f03d5f7..c8784ab37852 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -85,8 +85,6 @@ #include <crypto/hash.h> #include <linux/scatterlist.h> -int sysctl_tcp_low_latency __read_mostly; - #ifdef CONFIG_TCP_MD5SIG static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th); @@ -385,7 +383,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr, th->dest, iph->saddr, ntohs(th->source), - inet_iif(icmp_skb)); + inet_iif(icmp_skb), 0); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; @@ -661,7 +659,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0, ip_hdr(skb)->saddr, th->source, ip_hdr(skb)->daddr, - ntohs(th->source), inet_iif(skb)); + ntohs(th->source), inet_iif(skb), + tcp_v4_sdif(skb)); /* don't send rst if it can't find key */ if (!sk1) goto out; @@ -1269,7 +1268,7 @@ static void tcp_v4_init_req(struct request_sock *req, sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - ireq->opt = tcp_v4_save_options(skb); + ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb); } static struct dst_entry *tcp_v4_route_req(const struct sock *sk, @@ -1458,7 +1457,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_rx_dst = NULL; } } - tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len); + tcp_rcv_established(sk, skb, tcp_hdr(skb)); return 0; } @@ -1525,7 +1524,7 @@ void tcp_v4_early_demux(struct sk_buff *skb) sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, ntohs(th->dest), - skb->skb_iif); + skb->skb_iif, inet_sdif(skb)); if (sk) { skb->sk = sk; skb->destructor = sock_edemux; @@ -1541,61 +1540,6 @@ void tcp_v4_early_demux(struct sk_buff *skb) } } -/* Packet is added to VJ-style prequeue for processing in process - * context, if a reader task is waiting. Apparently, this exciting - * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) - * failed somewhere. Latency? Burstiness? Well, at least now we will - * see, why it failed. 8)8) --ANK - * - */ -bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (sysctl_tcp_low_latency || !tp->ucopy.task) - return false; - - if (skb->len <= tcp_hdrlen(skb) && - skb_queue_len(&tp->ucopy.prequeue) == 0) - return false; - - /* Before escaping RCU protected region, we need to take care of skb - * dst. Prequeue is only enabled for established sockets. - * For such sockets, we might need the skb dst only to set sk->sk_rx_dst - * Instead of doing full sk_rx_dst validity here, let's perform - * an optimistic check. - */ - if (likely(sk->sk_rx_dst)) - skb_dst_drop(skb); - else - skb_dst_force_safe(skb); - - __skb_queue_tail(&tp->ucopy.prequeue, skb); - tp->ucopy.memory += skb->truesize; - if (skb_queue_len(&tp->ucopy.prequeue) >= 32 || - tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { - struct sk_buff *skb1; - - BUG_ON(sock_owned_by_user(sk)); - __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED, - skb_queue_len(&tp->ucopy.prequeue)); - - while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk_backlog_rcv(sk, skb1); - - tp->ucopy.memory = 0; - } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible_sync_poll(sk_sleep(sk), - POLLIN | POLLRDNORM | POLLRDBAND); - if (!inet_csk_ack_scheduled(sk)) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - (3 * tcp_rto_min(sk)) / 4, - TCP_RTO_MAX); - } - return true; -} -EXPORT_SYMBOL(tcp_prequeue); - bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) { u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf; @@ -1645,6 +1589,7 @@ EXPORT_SYMBOL(tcp_filter); int tcp_v4_rcv(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); + int sdif = inet_sdif(skb); const struct iphdr *iph; const struct tcphdr *th; bool refcounted; @@ -1695,7 +1640,7 @@ int tcp_v4_rcv(struct sk_buff *skb) lookup: sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, - th->dest, &refcounted); + th->dest, sdif, &refcounted); if (!sk) goto no_tcp_socket; @@ -1770,8 +1715,7 @@ process: tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { - if (!tcp_prequeue(sk, skb)) - ret = tcp_v4_do_rcv(sk, skb); + ret = tcp_v4_do_rcv(sk, skb); } else if (tcp_add_backlog(sk, skb)) { goto discard_and_relse; } @@ -1824,7 +1768,8 @@ do_time_wait: __tcp_hdrlen(th), iph->saddr, th->source, iph->daddr, th->dest, - inet_iif(skb)); + inet_iif(skb), + sdif); if (sk2) { inet_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; @@ -1936,9 +1881,6 @@ void tcp_v4_destroy_sock(struct sock *sk) } #endif - /* Clean prequeue, it must be empty really */ - __skb_queue_purge(&tp->ucopy.prequeue); - /* Clean up a referenced TCP bind bucket. */ if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0ff83c1637d8..1537b87c657f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -436,8 +436,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, struct tcp_sock *newtp = tcp_sk(newsk); /* Now setup tcp_sock */ - newtp->pred_flags = 0; - newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; newtp->segs_in = 1; @@ -445,7 +443,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; - tcp_prequeue_init(newtp); INIT_LIST_HEAD(&newtp->tsq_node); tcp_init_wl(newtp, treq->rcv_isn); diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index 6d650ed3cb59..1ff73982e28c 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -86,7 +86,6 @@ struct tcpnv { * < 0 => less than 1 packet/RTT */ u8 available8; u16 available16; - u32 loss_cwnd; /* cwnd at last loss */ u8 nv_allow_cwnd_growth:1, /* whether cwnd can grow */ nv_reset:1, /* whether to reset values */ nv_catchup:1; /* whether we are growing because @@ -121,7 +120,6 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); ca->nv_reset = 0; - ca->loss_cwnd = 0; ca->nv_no_cong_cnt = 0; ca->nv_rtt_cnt = 0; ca->nv_last_rtt = 0; @@ -177,19 +175,10 @@ static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked) static u32 tcpnv_recalc_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - struct tcpnv *ca = inet_csk_ca(sk); - ca->loss_cwnd = tp->snd_cwnd; return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U); } -static u32 tcpnv_undo_cwnd(struct sock *sk) -{ - struct tcpnv *ca = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); -} - static void tcpnv_state(struct sock *sk, u8 new_state) { struct tcpnv *ca = inet_csk_ca(sk); @@ -446,7 +435,7 @@ static struct tcp_congestion_ops tcpnv __read_mostly = { .ssthresh = tcpnv_recalc_ssthresh, .cong_avoid = tcpnv_cong_avoid, .set_state = tcpnv_state, - .undo_cwnd = tcpnv_undo_cwnd, + .undo_cwnd = tcp_reno_undo_cwnd, .pkts_acked = tcpnv_acked, .get_info = tcpnv_get_info, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b7661a68d498..3e0d19631534 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -295,9 +295,7 @@ static u16 tcp_select_window(struct sock *sk) /* RFC1323 scaling applied */ new_win >>= tp->rx_opt.rcv_wscale; - /* If we advertise zero window, disable fast path. */ if (new_win == 0) { - tp->pred_flags = 0; if (old_win) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTOZEROWINDOWADV); @@ -2377,7 +2375,6 @@ bool tcp_schedule_loss_probe(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); u32 timeout, rto_delta_us; /* Don't do any loss probe on a Fast Open connection before 3WHS @@ -2398,15 +2395,19 @@ bool tcp_schedule_loss_probe(struct sock *sk) tcp_send_head(sk)) return false; - /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account + /* Probe timeout is 2*rtt. Add minimum RTO to account * for delayed ack when there's one outstanding packet. If no RTT * sample is available then probe after TCP_TIMEOUT_INIT. */ - timeout = rtt << 1 ? : TCP_TIMEOUT_INIT; - if (tp->packets_out == 1) - timeout = max_t(u32, timeout, - (rtt + (rtt >> 1) + TCP_DELACK_MAX)); - timeout = max_t(u32, timeout, msecs_to_jiffies(10)); + if (tp->srtt_us) { + timeout = usecs_to_jiffies(tp->srtt_us >> 2); + if (tp->packets_out == 1) + timeout += TCP_RTO_MIN; + else + timeout += TCP_TIMEOUT_MIN; + } else { + timeout = TCP_TIMEOUT_INIT; + } /* If the RTO formula yields an earlier time, then use that time. */ rto_delta_us = tcp_rto_delta_us(sk); /* How far in future is RTO? */ diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index f6c50af24a64..697f4c67b2e3 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -105,8 +105,9 @@ static inline int tcp_probe_avail(void) * Note: arguments must match tcp_rcv_established()! */ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len) + const struct tcphdr *th) { + unsigned int len = skb->len; const struct tcp_sock *tp = tcp_sk(sk); const struct inet_sock *inet = inet_sk(sk); @@ -145,7 +146,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, BUG(); } - p->length = skb->len; + p->length = len; p->snd_nxt = tp->snd_nxt; p->snd_una = tp->snd_una; p->snd_cwnd = tp->snd_cwnd; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index fe9a493d0208..449cd914d58e 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -113,7 +113,7 @@ void tcp_rack_mark_lost(struct sock *sk) tp->rack.advanced = 0; tcp_rack_detect_loss(sk, &timeout); if (timeout) { - timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN); + timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN; inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT, timeout, inet_csk(sk)->icsk_rto); } diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index f2123075ce6e..addc122f8818 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -15,10 +15,6 @@ #define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_MD_SCALE 3 -struct scalable { - u32 loss_cwnd; -}; - static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -36,23 +32,13 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) static u32 tcp_scalable_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - struct scalable *ca = inet_csk_ca(sk); - - ca->loss_cwnd = tp->snd_cwnd; return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U); } -static u32 tcp_scalable_cwnd_undo(struct sock *sk) -{ - const struct scalable *ca = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); -} - static struct tcp_congestion_ops tcp_scalable __read_mostly = { .ssthresh = tcp_scalable_ssthresh, - .undo_cwnd = tcp_scalable_cwnd_undo, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_scalable_cong_avoid, .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index e906014890b6..655dd8d7f064 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -239,7 +239,6 @@ static int tcp_write_timeout(struct sock *sk) /* Called with BH disabled */ void tcp_delack_timer_handler(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); sk_mem_reclaim_partial(sk); @@ -254,17 +253,6 @@ void tcp_delack_timer_handler(struct sock *sk) } icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; - if (!skb_queue_empty(&tp->ucopy.prequeue)) { - struct sk_buff *skb; - - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED); - - while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk_backlog_rcv(sk, skb); - - tp->ucopy.memory = 0; - } - if (inet_csk_ack_scheduled(sk)) { if (!icsk->icsk_ack.pingpong) { /* Delayed ACK missed: inflate ATO. */ diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 76005d4b8dfc..6fcf482d611b 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -30,7 +30,6 @@ struct veno { u32 basertt; /* the min of all Veno rtt measurements seen (in usec) */ u32 inc; /* decide whether to increase cwnd */ u32 diff; /* calculate the diff rate */ - u32 loss_cwnd; /* cwnd when loss occured */ }; /* There are several situations when we must "re-start" Veno: @@ -194,7 +193,6 @@ static u32 tcp_veno_ssthresh(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); struct veno *veno = inet_csk_ca(sk); - veno->loss_cwnd = tp->snd_cwnd; if (veno->diff < beta) /* in "non-congestive state", cut cwnd by 1/5 */ return max(tp->snd_cwnd * 4 / 5, 2U); @@ -203,17 +201,10 @@ static u32 tcp_veno_ssthresh(struct sock *sk) return max(tp->snd_cwnd >> 1U, 2U); } -static u32 tcp_veno_cwnd_undo(struct sock *sk) -{ - const struct veno *veno = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, veno->loss_cwnd); -} - static struct tcp_congestion_ops tcp_veno __read_mostly = { .init = tcp_veno_init, .ssthresh = tcp_veno_ssthresh, - .undo_cwnd = tcp_veno_cwnd_undo, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_veno_cong_avoid, .pkts_acked = tcp_veno_pkts_acked, .set_state = tcp_veno_state, diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index bec9cafbe3f9..e5de84310949 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -154,24 +154,6 @@ static inline void update_rtt_min(struct westwood *w) } /* - * @westwood_fast_bw - * It is called when we are in fast path. In particular it is called when - * header prediction is successful. In such case in fact update is - * straight forward and doesn't need any particular care. - */ -static inline void westwood_fast_bw(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct westwood *w = inet_csk_ca(sk); - - westwood_update_window(sk); - - w->bk += tp->snd_una - w->snd_una; - w->snd_una = tp->snd_una; - update_rtt_min(w); -} - -/* * @westwood_acked_count * This function evaluates cumul_ack for evaluating bk in case of * delayed or partial acks. @@ -223,17 +205,12 @@ static u32 tcp_westwood_bw_rttmin(const struct sock *sk) static void tcp_westwood_ack(struct sock *sk, u32 ack_flags) { - if (ack_flags & CA_ACK_SLOWPATH) { - struct westwood *w = inet_csk_ca(sk); - - westwood_update_window(sk); - w->bk += westwood_acked_count(sk); + struct westwood *w = inet_csk_ca(sk); - update_rtt_min(w); - return; - } + westwood_update_window(sk); + w->bk += westwood_acked_count(sk); - westwood_fast_bw(sk); + update_rtt_min(w); } static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index e6ff99c4bd3b..96e829b2e2fc 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -37,7 +37,6 @@ struct yeah { u32 fast_count; u32 pkts_acked; - u32 loss_cwnd; }; static void tcp_yeah_init(struct sock *sk) @@ -220,22 +219,14 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) yeah->fast_count = 0; yeah->reno_count = max(yeah->reno_count>>1, 2U); - yeah->loss_cwnd = tp->snd_cwnd; return max_t(int, tp->snd_cwnd - reduction, 2); } -static u32 tcp_yeah_cwnd_undo(struct sock *sk) -{ - const struct yeah *yeah = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, yeah->loss_cwnd); -} - static struct tcp_congestion_ops tcp_yeah __read_mostly = { .init = tcp_yeah_init, .ssthresh = tcp_yeah_ssthresh, - .undo_cwnd = tcp_yeah_cwnd_undo, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_yeah_cong_avoid, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a7c804f73990..cb633884e825 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -380,8 +380,8 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) static int compute_score(struct sock *sk, struct net *net, __be32 saddr, __be16 sport, - __be32 daddr, unsigned short hnum, int dif, - bool exact_dif) + __be32 daddr, unsigned short hnum, + int dif, int sdif, bool exact_dif) { int score; struct inet_sock *inet; @@ -413,10 +413,15 @@ static int compute_score(struct sock *sk, struct net *net, } if (sk->sk_bound_dev_if || exact_dif) { - if (sk->sk_bound_dev_if != dif) + bool dev_match = (sk->sk_bound_dev_if == dif || + sk->sk_bound_dev_if == sdif); + + if (exact_dif && !dev_match) return -1; - score += 4; + if (sk->sk_bound_dev_if && dev_match) + score += 4; } + if (sk->sk_incoming_cpu == raw_smp_processor_id()) score++; return score; @@ -436,10 +441,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, /* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, - __be32 saddr, __be16 sport, - __be32 daddr, unsigned int hnum, int dif, bool exact_dif, - struct udp_hslot *hslot2, - struct sk_buff *skb) + __be32 saddr, __be16 sport, + __be32 daddr, unsigned int hnum, + int dif, int sdif, bool exact_dif, + struct udp_hslot *hslot2, + struct sk_buff *skb) { struct sock *sk, *result; int score, badness, matches = 0, reuseport = 0; @@ -449,7 +455,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, badness = 0; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif, exact_dif); + daddr, hnum, dif, sdif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -477,8 +483,8 @@ static struct sock *udp4_lib_lookup2(struct net *net, * harder than this. -DaveM */ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, - __be16 sport, __be32 daddr, __be16 dport, - int dif, struct udp_table *udptable, struct sk_buff *skb) + __be16 sport, __be32 daddr, __be16 dport, int dif, + int sdif, struct udp_table *udptable, struct sk_buff *skb) { struct sock *sk, *result; unsigned short hnum = ntohs(dport); @@ -496,7 +502,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, goto begin; result = udp4_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, + daddr, hnum, dif, sdif, exact_dif, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; @@ -511,7 +517,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, goto begin; result = udp4_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, + daddr, hnum, dif, sdif, exact_dif, hslot2, skb); } return result; @@ -521,7 +527,7 @@ begin: badness = 0; sk_for_each_rcu(sk, &hslot->head) { score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif, exact_dif); + daddr, hnum, dif, sdif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -554,7 +560,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, iph->daddr, dport, inet_iif(skb), - udptable, skb); + inet_sdif(skb), udptable, skb); } struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, @@ -576,7 +582,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, struct sock *sk; sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, - dif, &udp_table, NULL); + dif, 0, &udp_table, NULL); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; @@ -587,7 +593,7 @@ EXPORT_SYMBOL_GPL(udp4_lib_lookup); static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, - int dif, unsigned short hnum) + int dif, int sdif, unsigned short hnum) { struct inet_sock *inet = inet_sk(sk); @@ -597,9 +603,10 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, (inet->inet_dport != rmt_port && inet->inet_dport) || (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) || ipv6_only_sock(sk) || - (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && + sk->sk_bound_dev_if != sdif)) return false; - if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif)) + if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif)) return false; return true; } @@ -628,8 +635,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) struct net *net = dev_net(skb->dev); sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, - iph->saddr, uh->source, skb->dev->ifindex, udptable, - NULL); + iph->saddr, uh->source, skb->dev->ifindex, 0, + udptable, NULL); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; /* No socket for error */ @@ -1176,7 +1183,11 @@ static void udp_set_dev_scratch(struct sk_buff *skb) scratch->csum_unnecessary = !!skb_csum_unnecessary(skb); scratch->is_linear = !skb_is_nonlinear(skb); #endif - if (likely(!skb->_skb_refdst)) + /* all head states execept sp (dst, sk, nf) are always cleared by + * udp_rcv() and we need to preserve secpath, if present, to eventually + * process IP_CMSG_PASSSEC at recvmsg() time + */ + if (likely(!skb_sec_path(skb))) scratch->_tsize_state |= UDP_SKB_IS_STATELESS; } @@ -1782,13 +1793,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_mark_napi_id_once(sk, skb); } - /* At recvmsg() time we may access skb->dst or skb->sp depending on - * the IP options and the cmsg flags, elsewhere can we clear all - * pending head states while they are hot in the cache - */ - if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb))) - skb_release_head_state(skb); - rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -1956,6 +1960,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); unsigned int offset = offsetof(typeof(*sk), sk_node); int dif = skb->dev->ifindex; + int sdif = inet_sdif(skb); struct hlist_node *node; struct sk_buff *nskb; @@ -1970,7 +1975,7 @@ start_lookup: sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr, - uh->source, saddr, dif, hnum)) + uh->source, saddr, dif, sdif, hnum)) continue; if (!first) { @@ -2160,7 +2165,7 @@ drop: static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, - int dif) + int dif, int sdif) { struct sock *sk, *result; unsigned short hnum = ntohs(loc_port); @@ -2174,7 +2179,7 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, result = NULL; sk_for_each_rcu(sk, &hslot->head) { if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr, - rmt_port, rmt_addr, dif, hnum)) { + rmt_port, rmt_addr, dif, sdif, hnum)) { if (result) return NULL; result = sk; @@ -2191,7 +2196,7 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, static struct sock *__udp4_lib_demux_lookup(struct net *net, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, - int dif) + int dif, int sdif) { unsigned short hnum = ntohs(loc_port); unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); @@ -2203,7 +2208,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { if (INET_MATCH(sk, net, acookie, rmt_addr, - loc_addr, ports, dif)) + loc_addr, ports, dif, sdif)) return sk; /* Only check first socket in chain */ break; @@ -2219,6 +2224,7 @@ void udp_v4_early_demux(struct sk_buff *skb) struct sock *sk = NULL; struct dst_entry *dst; int dif = skb->dev->ifindex; + int sdif = inet_sdif(skb); int ours; /* validate the packet */ @@ -2244,10 +2250,11 @@ void udp_v4_early_demux(struct sk_buff *skb) } sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, - uh->source, iph->saddr, dif); + uh->source, iph->saddr, + dif, sdif); } else if (skb->pkt_type == PACKET_HOST) { sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, - uh->source, iph->saddr, dif); + uh->source, iph->saddr, dif, sdif); } if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 4515836d2a3a..d0390d844ac8 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -45,7 +45,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, sk = __udp4_lib_lookup(net, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_dst[0], req->id.idiag_dport, - req->id.idiag_if, tbl, NULL); + req->id.idiag_if, 0, tbl, NULL); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) sk = __udp6_lib_lookup(net, @@ -53,7 +53,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, req->id.idiag_sport, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, - req->id.idiag_if, tbl, NULL); + req->id.idiag_if, 0, tbl, NULL); #endif if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; @@ -182,7 +182,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, sk = __udp4_lib_lookup(net, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, - req->id.idiag_if, tbl, NULL); + req->id.idiag_if, 0, tbl, NULL); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) { if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && @@ -190,7 +190,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, sk = __udp4_lib_lookup(net, req->id.idiag_dst[3], req->id.idiag_dport, req->id.idiag_src[3], req->id.idiag_sport, - req->id.idiag_if, tbl, NULL); + req->id.idiag_if, 0, tbl, NULL); else sk = __udp6_lib_lookup(net, @@ -198,7 +198,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, req->id.idiag_dport, (struct in6_addr *)req->id.idiag_src, req->id.idiag_sport, - req->id.idiag_if, tbl, NULL); + req->id.idiag_if, 0, tbl, NULL); } #endif else { diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0932c85b42af..97658bfc1b58 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -21,7 +21,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, __be16 new_protocol, bool is_ipv6) { int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); - bool remcsum, need_csum, offload_csum, ufo, gso_partial; + bool remcsum, need_csum, offload_csum, gso_partial; struct sk_buff *segs = ERR_PTR(-EINVAL); struct udphdr *uh = udp_hdr(skb); u16 mac_offset = skb->mac_header; @@ -61,8 +61,6 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); skb->remcsum_offload = remcsum; - ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); - need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb)); /* Try to offload checksum if possible */ offload_csum = !!(need_csum && @@ -77,7 +75,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * outer one so strip the existing checksum feature flags and * instead set the flag based on our outer checksum offload value. */ - if (remcsum || ufo) { + if (remcsum) { features &= ~NETIF_F_CSUM_MASK; if (!need_csum || offload_csum) features |= NETIF_F_HW_CSUM; @@ -189,66 +187,16 @@ out_unlock: } EXPORT_SYMBOL(skb_udp_tunnel_segment); -static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - __wsum csum; - struct udphdr *uh; - struct iphdr *iph; if (skb->encapsulation && (skb_shinfo(skb)->gso_type & - (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) segs = skb_udp_tunnel_segment(skb, features, false); - goto out; - } - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto out; - - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - /* Do software UFO. Complete and fill in the UDP checksum as - * HW cannot do checksum of UDP packets sent as multiple - * IP fragments. - */ - - uh = udp_hdr(skb); - iph = ip_hdr(skb); - - uh->check = 0; - csum = skb_checksum(skb, 0, skb->len, 0); - uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - - /* If there is no outer header we can fake a checksum offload - * due to the fact that we have already done the checksum in - * software prior to segmenting the frame. - */ - if (!skb->encap_hdr_csum) - features |= NETIF_F_HW_CSUM; - - /* Fragment the skb. IP headers of the fragments are updated in - * inet_gso_segment() - */ - segs = skb_segment(skb, features); -out: return segs; } @@ -382,7 +330,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) static const struct net_offload udpv4_offload = { .callbacks = { - .gso_segment = udp4_ufo_fragment, + .gso_segment = udp4_tunnel_segment, .gro_receive = udp4_gro_receive, .gro_complete = udp4_gro_complete, }, diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 58bd39fb14b4..6539ff15e9a3 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -82,7 +82,8 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, struct sock *sk = sock->sk; struct udp_tunnel_info ti; - if (!dev->netdev_ops->ndo_udp_tunnel_add) + if (!dev->netdev_ops->ndo_udp_tunnel_add || + !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) return; ti.type = type; @@ -93,6 +94,24 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, } EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port); +void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, + unsigned short type) +{ + struct sock *sk = sock->sk; + struct udp_tunnel_info ti; + + if (!dev->netdev_ops->ndo_udp_tunnel_del || + !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + return; + + ti.type = type; + ti.sa_family = sk->sk_family; + ti.port = inet_sk(sk)->inet_sport; + + dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti); +} +EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port); + /* Notify netdevs that UDP port started listening */ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type) { @@ -109,6 +128,8 @@ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type) for_each_netdev_rcu(net, dev) { if (!dev->netdev_ops->ndo_udp_tunnel_add) continue; + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + continue; dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti); } rcu_read_unlock(); @@ -131,6 +152,8 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type) for_each_netdev_rcu(net, dev) { if (!dev->netdev_ops->ndo_udp_tunnel_del) continue; + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + continue; dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti); } rcu_read_unlock(); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 71b4ecc195c7..4aefb149fe0a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -213,14 +213,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) fl4->flowi4_tos = iph->tos; } -static inline int xfrm4_garbage_collect(struct dst_ops *ops) -{ - struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); - - xfrm_garbage_collect_deferred(net); - return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); -} - static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { @@ -259,14 +251,13 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm4_dst_ops_template = { .family = AF_INET, - .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, .redirect = xfrm4_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, - .gc_thresh = INT_MAX, + .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = { diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 48c452959d2c..0d722396dce6 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -311,19 +311,8 @@ config IPV6_SEG6_LWTUNNEL ---help--- Support for encapsulation of packets within an outer IPv6 header and a Segment Routing Header using the lightweight - tunnels mechanism. - - If unsure, say N. - -config IPV6_SEG6_INLINE - bool "IPv6: direct Segment Routing Header insertion " - depends on IPV6_SEG6_LWTUNNEL - ---help--- - Support for direct insertion of the Segment Routing Header, - also known as inline mode. Be aware that direct insertion of - extension headers (as opposed to encapsulation) may break - multiple mechanisms such as PMTUD or IPSec AH. Use this feature - only if you know exactly what you are doing. + tunnels mechanism. Also enable support for advanced local + processing of SRv6 packets based on their active segment. If unsure, say N. diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 217e9ff0e24b..10e342363793 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o seg6.o + udp_offload.o seg6.o fib6_notifier.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o @@ -23,7 +23,7 @@ ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-$(CONFIG_PROC_FS) += proc.o ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o ipv6-$(CONFIG_NETLABEL) += calipso.o -ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o +ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o ipv6-objs += $(ipv6-y) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3c46e9513a31..640792e1ecb7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3066,7 +3066,7 @@ static void init_loopback(struct net_device *dev) * lo device down, release this obsolete dst and * reallocate a new router for ifa. */ - if (!atomic_read(&sp_ifa->rt->rt6i_ref)) { + if (!sp_ifa->rt->rt6i_node) { ip6_rt_put(sp_ifa->rt); sp_ifa->rt = NULL; } else { @@ -3321,11 +3321,11 @@ static void addrconf_gre_config(struct net_device *dev) static int fixup_permanent_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) { - /* rt6i_ref == 0 means the host route was removed from the + /* !rt6i_node means the host route was removed from the * FIB, for example, if 'lo' device is taken down. In that * case regenerate the host route. */ - if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) { + if (!ifp->rt || !ifp->rt->rt6i_node) { struct rt6_info *rt, *prev; rt = addrconf_dst_alloc(idev, &ifp->addr, false); @@ -6605,21 +6605,21 @@ int __init addrconf_init(void) rtnl_af_register(&inet6_ops); err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo, - NULL); + 0); if (err < 0) goto errout; /* Only the first call to __rtnl_register can fail */ - __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL); - __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL); + __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0); + __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0); __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, - inet6_dump_ifaddr, NULL); + inet6_dump_ifaddr, 0); __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, - inet6_dump_ifmcaddr, NULL); + inet6_dump_ifmcaddr, 0); __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, - inet6_dump_ifacaddr, NULL); + inet6_dump_ifacaddr, 0); __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, - inet6_netconf_dump_devconf, NULL); + inet6_netconf_dump_devconf, 0); ipv6_addr_label_rtnl_register(); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 7a428f65c7ec..cea5eb488013 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -593,10 +593,10 @@ out: void __init ipv6_addr_label_rtnl_register(void) { __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, - NULL, NULL); + NULL, 0); __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, - NULL, NULL); + NULL, 0); __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, - ip6addrlbl_dump, NULL); + ip6addrlbl_dump, 0); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a88b5b5b7955..0a7c74049a0c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -210,7 +210,7 @@ lookup_protocol: np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk)); + np->autoflowlabel = ip6_default_np_autolabel(net); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 3cec529c6113..95516138e861 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -882,7 +882,7 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto, (hops - 1) * sizeof(struct in6_addr)); sr_phdr->segments[0] = **addr_p; - *addr_p = &sr_ihdr->segments[hops - 1]; + *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left]; #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(sr_phdr)) { @@ -1174,7 +1174,7 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, { struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt; - fl6->daddr = srh->segments[srh->first_segment]; + fl6->daddr = srh->segments[srh->segments_left]; break; } default: diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c new file mode 100644 index 000000000000..66a103ef7e86 --- /dev/null +++ b/net/ipv6/fib6_notifier.c @@ -0,0 +1,61 @@ +#include <linux/notifier.h> +#include <linux/socket.h> +#include <linux/kernel.h> +#include <net/net_namespace.h> +#include <net/fib_notifier.h> +#include <net/netns/ipv6.h> +#include <net/ip6_fib.h> + +int call_fib6_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->family = AF_INET6; + return call_fib_notifier(nb, net, event_type, info); +} + +int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->family = AF_INET6; + return call_fib_notifiers(net, event_type, info); +} + +static unsigned int fib6_seq_read(struct net *net) +{ + return fib6_tables_seq_read(net) + fib6_rules_seq_read(net); +} + +static int fib6_dump(struct net *net, struct notifier_block *nb) +{ + int err; + + err = fib6_rules_dump(net, nb); + if (err) + return err; + + return fib6_tables_dump(net, nb); +} + +static const struct fib_notifier_ops fib6_notifier_ops_template = { + .family = AF_INET6, + .fib_seq_read = fib6_seq_read, + .fib_dump = fib6_dump, +}; + +int __net_init fib6_notifier_init(struct net *net) +{ + struct fib_notifier_ops *ops; + + ops = fib_notifier_ops_register(&fib6_notifier_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); + net->ipv6.notifier_ops = ops; + + return 0; +} + +void __net_exit fib6_notifier_exit(struct net *net) +{ + fib_notifier_ops_unregister(net->ipv6.notifier_ops); +} diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index ec849d88a662..b240f24a6e52 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -14,6 +14,7 @@ */ #include <linux/netdevice.h> +#include <linux/notifier.h> #include <linux/export.h> #include <net/fib_rules.h> @@ -29,22 +30,65 @@ struct fib6_rule { u8 tclass; }; -struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, - int flags, pol_lookup_t lookup) +static bool fib6_rule_matchall(const struct fib_rule *rule) +{ + struct fib6_rule *r = container_of(rule, struct fib6_rule, common); + + if (r->dst.plen || r->src.plen || r->tclass) + return false; + return fib_rule_matchall(rule); +} + +bool fib6_rule_default(const struct fib_rule *rule) { - struct fib_lookup_arg arg = { - .lookup_ptr = lookup, - .flags = FIB_LOOKUP_NOREF, - }; + if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL || + rule->l3mdev) + return false; + if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN) + return false; + return true; +} +EXPORT_SYMBOL_GPL(fib6_rule_default); - /* update flow if oif or iif point to device enslaved to l3mdev */ - l3mdev_update_flow(net, flowi6_to_flowi(fl6)); +int fib6_rules_dump(struct net *net, struct notifier_block *nb) +{ + return fib_rules_dump(net, nb, AF_INET6); +} - fib_rules_lookup(net->ipv6.fib6_rules_ops, - flowi6_to_flowi(fl6), flags, &arg); +unsigned int fib6_rules_seq_read(struct net *net) +{ + return fib_rules_seq_read(net, AF_INET6); +} - if (arg.result) - return arg.result; +struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, + int flags, pol_lookup_t lookup) +{ + if (net->ipv6.fib6_has_custom_rules) { + struct fib_lookup_arg arg = { + .lookup_ptr = lookup, + .flags = FIB_LOOKUP_NOREF, + }; + + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi6_to_flowi(fl6)); + + fib_rules_lookup(net->ipv6.fib6_rules_ops, + flowi6_to_flowi(fl6), flags, &arg); + + if (arg.result) + return arg.result; + } else { + struct rt6_info *rt; + + rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags); + if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN) + return &rt->dst; + ip6_rt_put(rt); + rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); + if (rt->dst.error != -EAGAIN) + return &rt->dst; + ip6_rt_put(rt); + } dst_hold(&net->ipv6.ip6_null_entry->dst); return &net->ipv6.ip6_null_entry->dst; @@ -214,6 +258,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule6->dst.plen = frh->dst_len; rule6->tclass = frh->tos; + net->ipv6.fib6_has_custom_rules = true; err = 0; errout: return err; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b13b8f93079d..b01858f5deb1 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -56,7 +56,7 @@ struct sock *__inet6_lookup_established(struct net *net, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif) + const int dif, const int sdif) { struct sock *sk; const struct hlist_nulls_node *node; @@ -73,12 +73,12 @@ begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; - if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif)) + if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif)) continue; if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; - if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { + if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) { sock_gen_put(sk); goto begin; } @@ -96,7 +96,7 @@ EXPORT_SYMBOL(__inet6_lookup_established); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const struct in6_addr *daddr, - const int dif, bool exact_dif) + const int dif, const int sdif, bool exact_dif) { int score = -1; @@ -110,9 +110,13 @@ static inline int compute_score(struct sock *sk, struct net *net, score++; } if (sk->sk_bound_dev_if || exact_dif) { - if (sk->sk_bound_dev_if != dif) + bool dev_match = (sk->sk_bound_dev_if == dif || + sk->sk_bound_dev_if == sdif); + + if (exact_dif && !dev_match) return -1; - score++; + if (sk->sk_bound_dev_if && dev_match) + score++; } if (sk->sk_incoming_cpu == raw_smp_processor_id()) score++; @@ -126,7 +130,7 @@ struct sock *inet6_lookup_listener(struct net *net, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, - const unsigned short hnum, const int dif) + const unsigned short hnum, const int dif, const int sdif) { unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; @@ -136,7 +140,7 @@ struct sock *inet6_lookup_listener(struct net *net, u32 phash = 0; sk_for_each(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, dif, exact_dif); + score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -171,7 +175,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, bool refcounted; sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, - ntohs(dport), dif, &refcounted); + ntohs(dport), dif, 0, &refcounted); if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; @@ -187,8 +191,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; const struct in6_addr *saddr = &sk->sk_v6_daddr; const int dif = sk->sk_bound_dev_if; - const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); + const int sdif = l3mdev_master_ifindex_by_index(net, dif); + const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); @@ -203,7 +208,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, if (sk2->sk_hash != hash) continue; - if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) { + if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, + dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (twsk_unique(sk, sk2, twp)) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ebb299cf72b7..8c58c7558de0 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -33,6 +33,7 @@ #include <net/ndisc.h> #include <net/addrconf.h> #include <net/lwtunnel.h> +#include <net/fib_notifier.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> @@ -153,7 +154,7 @@ static void node_free(struct fib6_node *fn) kmem_cache_free(fib6_node_kmem, fn); } -static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) +void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) { int cpu; @@ -176,15 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) free_percpu(non_pcpu_rt->rt6i_pcpu); non_pcpu_rt->rt6i_pcpu = NULL; } - -static void rt6_release(struct rt6_info *rt) -{ - if (atomic_dec_and_test(&rt->rt6i_ref)) { - rt6_free_pcpu(rt); - dst_dev_put(&rt->dst); - dst_release(&rt->dst); - } -} +EXPORT_SYMBOL_GPL(rt6_free_pcpu); static void fib6_link_table(struct net *net, struct fib6_table *tb) { @@ -302,6 +295,109 @@ static void __net_init fib6_tables_init(struct net *net) #endif +unsigned int fib6_tables_seq_read(struct net *net) +{ + unsigned int h, fib_seq = 0; + + rcu_read_lock(); + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { + struct hlist_head *head = &net->ipv6.fib_table_hash[h]; + struct fib6_table *tb; + + hlist_for_each_entry_rcu(tb, head, tb6_hlist) { + read_lock_bh(&tb->tb6_lock); + fib_seq += tb->fib_seq; + read_unlock_bh(&tb->tb6_lock); + } + } + rcu_read_unlock(); + + return fib_seq; +} + +static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct rt6_info *rt) +{ + struct fib6_entry_notifier_info info = { + .rt = rt, + }; + + return call_fib6_notifier(nb, net, event_type, &info.info); +} + +static int call_fib6_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct rt6_info *rt) +{ + struct fib6_entry_notifier_info info = { + .rt = rt, + }; + + rt->rt6i_table->fib_seq++; + return call_fib6_notifiers(net, event_type, &info.info); +} + +struct fib6_dump_arg { + struct net *net; + struct notifier_block *nb; +}; + +static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg) +{ + if (rt == arg->net->ipv6.ip6_null_entry) + return; + call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt); +} + +static int fib6_node_dump(struct fib6_walker *w) +{ + struct rt6_info *rt; + + for (rt = w->leaf; rt; rt = rt->dst.rt6_next) + fib6_rt_dump(rt, w->args); + w->leaf = NULL; + return 0; +} + +static void fib6_table_dump(struct net *net, struct fib6_table *tb, + struct fib6_walker *w) +{ + w->root = &tb->tb6_root; + read_lock_bh(&tb->tb6_lock); + fib6_walk(net, w); + read_unlock_bh(&tb->tb6_lock); +} + +/* Called with rcu_read_lock() */ +int fib6_tables_dump(struct net *net, struct notifier_block *nb) +{ + struct fib6_dump_arg arg; + struct fib6_walker *w; + unsigned int h; + + w = kzalloc(sizeof(*w), GFP_ATOMIC); + if (!w) + return -ENOMEM; + + w->func = fib6_node_dump; + arg.net = net; + arg.nb = nb; + w->args = &arg; + + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { + struct hlist_head *head = &net->ipv6.fib_table_hash[h]; + struct fib6_table *tb; + + hlist_for_each_entry_rcu(tb, head, tb6_hlist) + fib6_table_dump(net, tb, w); + } + + kfree(w); + + return 0; +} + static int fib6_dump_node(struct fib6_walker *w) { int res; @@ -733,8 +829,6 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, } fn = fn->parent; } - /* No more references are possible at this point. */ - BUG_ON(atomic_read(&rt->rt6i_ref) != 1); } } @@ -879,6 +973,8 @@ add: *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); + call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD, + rt); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; @@ -906,6 +1002,8 @@ add: rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); + call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, + rt); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); if (!(fn->fn_flags & RTN_RTINFO)) { @@ -913,6 +1011,7 @@ add: fn->fn_flags |= RTN_RTINFO; } nsiblings = iter->rt6i_nsiblings; + iter->rt6i_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); rt6_release(iter); @@ -925,6 +1024,7 @@ add: break; if (rt6_qualify_for_ecmp(iter)) { *ins = iter->dst.rt6_next; + iter->rt6i_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); rt6_release(iter); nsiblings--; @@ -1459,6 +1559,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, fib6_purge_rt(rt, fn, net); + call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt); if (!info->skip_notify) inet6_rt_notify(RTM_DELROUTE, rt, info, 0); rt6_release(rt); @@ -1839,6 +1940,11 @@ static void fib6_gc_timer_cb(unsigned long arg) static int __net_init fib6_net_init(struct net *net) { size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ; + int err; + + err = fib6_notifier_init(net); + if (err) + return err; spin_lock_init(&net->ipv6.fib6_gc_lock); rwlock_init(&net->ipv6.fib6_walker_lock); @@ -1891,6 +1997,7 @@ out_fib_table_hash: out_rt6_stats: kfree(net->ipv6.rt6_stats); out_timer: + fib6_notifier_exit(net); return -ENOMEM; } @@ -1907,6 +2014,7 @@ static void fib6_net_exit(struct net *net) kfree(net->ipv6.fib6_main_tbl); kfree(net->ipv6.fib_table_hash); kfree(net->ipv6.rt6_stats); + fib6_notifier_exit(net); } static struct pernet_operations fib6_net_ops = { @@ -1930,7 +2038,7 @@ int __init fib6_init(void) goto out_kmem_cache_create; ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, - NULL); + 0); if (ret) goto out_unregister_subsys; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2dfe50d8d609..43ca864327c7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1110,69 +1110,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); -static inline int ip6_ufo_append_data(struct sock *sk, - struct sk_buff_head *queue, - int getfrag(void *from, char *to, int offset, int len, - int odd, struct sk_buff *skb), - void *from, int length, int hh_len, int fragheaderlen, - int exthdrlen, int transhdrlen, int mtu, - unsigned int flags, const struct flowi6 *fl6) - -{ - struct sk_buff *skb; - int err; - - /* There is support for UDP large send offload by network - * device, so create one single skb packet containing complete - * udp datagram - */ - skb = skb_peek_tail(queue); - if (!skb) { - skb = sock_alloc_send_skb(sk, - hh_len + fragheaderlen + transhdrlen + 20, - (flags & MSG_DONTWAIT), &err); - if (!skb) - return err; - - /* reserve space for Hardware header */ - skb_reserve(skb, hh_len); - - /* create space for UDP/IP header */ - skb_put(skb, fragheaderlen + transhdrlen); - - /* initialize network header pointer */ - skb_set_network_header(skb, exthdrlen); - - /* initialize protocol header pointer */ - skb->transport_header = skb->network_header + fragheaderlen; - - skb->protocol = htons(ETH_P_IPV6); - skb->csum = 0; - - if (flags & MSG_CONFIRM) - skb_set_dst_pending_confirm(skb, 1); - - __skb_queue_tail(queue, skb); - } else if (skb_is_gso(skb)) { - goto append; - } - - skb->ip_summed = CHECKSUM_PARTIAL; - /* Specify the length of each IPv6 datagram fragment. - * It has to be a multiple of 8. - */ - skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - - sizeof(struct frag_hdr)) & ~7; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk), - &fl6->daddr, - &fl6->saddr); - -append: - return skb_append_datato_frags(sk, skb, getfrag, from, - (length - transhdrlen)); -} - static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, gfp_t gfp) { @@ -1381,20 +1318,6 @@ emsgsize: */ cork->length += length; - if ((skb && skb_is_gso(skb)) || - (((length + (skb ? skb->len : headersize)) > mtu) && - (skb_queue_len(queue) <= 1) && - (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && - (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) { - err = ip6_ufo_append_data(sk, queue, getfrag, from, length, - hh_len, fragheaderlen, exthdrlen, - transhdrlen, mtu, flags, fl6); - if (err) - goto error; - return 0; - } - if (!skb) goto alloc_new_skb; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 486c2305f53c..79444a4bfd6d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1145,33 +1145,6 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .priority = 100, }; -static bool is_vti6_tunnel(const struct net_device *dev) -{ - return dev->netdev_ops == &vti6_netdev_ops; -} - -static int vti6_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct ip6_tnl *t = netdev_priv(dev); - - if (!is_vti6_tunnel(dev)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_DOWN: - if (!net_eq(t->net, dev_net(dev))) - xfrm_garbage_collect(t->net); - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block vti6_notifier_block __read_mostly = { - .notifier_call = vti6_device_event, -}; - /** * vti6_tunnel_init - register protocol and reserve needed resources * @@ -1182,8 +1155,6 @@ static int __init vti6_tunnel_init(void) const char *msg; int err; - register_netdevice_notifier(&vti6_notifier_block); - msg = "tunnel device"; err = register_pernet_device(&vti6_net_ops); if (err < 0) @@ -1216,7 +1187,6 @@ xfrm_proto_ah_failed: xfrm_proto_esp_failed: unregister_pernet_device(&vti6_net_ops); pernet_dev_failed: - unregister_netdevice_notifier(&vti6_notifier_block); pr_err("vti6 init: failed to register %s\n", msg); return err; } @@ -1231,7 +1201,6 @@ static void __exit vti6_tunnel_cleanup(void) xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); unregister_pernet_device(&vti6_net_ops); - unregister_netdevice_notifier(&vti6_notifier_block); } module_init(vti6_tunnel_init); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 7454850f2098..f5500f5444e9 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1427,7 +1427,7 @@ int __init ip6_mr_init(void) } #endif rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, - ip6mr_rtm_dumproute, NULL); + ip6mr_rtm_dumproute, 0); return 0; #ifdef CONFIG_IPV6_PIMSM_V2 add_proto_fail: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0327c1f2e6fc..5e338eb89509 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1779,6 +1779,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, static struct notifier_block ndisc_netdev_notifier = { .notifier_call = ndisc_netdev_event, + .priority = ADDRCONF_NOTIFY_PRIORITY - 5, }; #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 60be012fe708..e4462b0ff801 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(raw_v6_hashinfo); struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, - const struct in6_addr *rmt_addr, int dif) + const struct in6_addr *rmt_addr, int dif, int sdif) { bool is_multicast = ipv6_addr_is_multicast(loc_addr); @@ -86,7 +86,9 @@ struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) continue; - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) + if (sk->sk_bound_dev_if && + sk->sk_bound_dev_if != dif && + sk->sk_bound_dev_if != sdif) continue; if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { @@ -178,7 +180,8 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) goto out; net = dev_net(skb->dev); - sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, inet6_iif(skb)); + sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, + inet6_iif(skb), inet6_sdif(skb)); while (sk) { int filtered; @@ -222,7 +225,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) } } sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr, - inet6_iif(skb)); + inet6_iif(skb), inet6_sdif(skb)); } out: read_unlock(&raw_v6_hashinfo.lock); @@ -378,7 +381,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, net = dev_net(skb->dev); while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, - inet6_iif(skb)))) { + inet6_iif(skb), inet6_iif(skb)))) { rawv6_err(sk, skb, NULL, type, code, inner_offset, info); sk = sk_next(sk); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a640fbcba15d..035762fed07d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1820,6 +1820,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } + if (cfg->fc_flags & RTF_OFFLOAD) { + NL_SET_ERR_MSG(extack, "Userspace can not set RTF_OFFLOAD"); + goto out; + } + if (cfg->fc_dst_len > 128) { NL_SET_ERR_MSG(extack, "Invalid prefix length"); goto out; @@ -3330,6 +3335,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, goto nla_put_failure; } + if (rt->rt6i_flags & RTF_OFFLOAD) + *flags |= RTNH_F_OFFLOAD; + /* not needed for multipath encoding b/c it has a rtnexthop struct */ if (!skip_oif && rt->dst.dev && nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) @@ -3921,6 +3929,7 @@ static int __net_init ip6_route_net_init(struct net *net) ip6_template_metrics, true); #ifdef CONFIG_IPV6_MULTIPLE_TABLES + net->ipv6.fib6_has_custom_rules = false; net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, sizeof(*net->ipv6.ip6_prohibit_entry), GFP_KERNEL); @@ -4096,9 +4105,9 @@ int __init ip6_route_init(void) goto fib6_rules_init; ret = -ENOBUFS; - if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || - __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || - __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) + if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) || + __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) || + __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, 0)) goto out_register_late_subsys; ret = register_netdevice_notifier(&ip6_route_dev_notifier); diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 15fba55e3da8..c81407770956 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -40,7 +40,7 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len) if (((srh->hdrlen + 1) << 3) != len) return false; - if (srh->segments_left != srh->first_segment) + if (srh->segments_left > srh->first_segment) return false; tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4); @@ -456,6 +456,10 @@ int __init seg6_init(void) err = seg6_iptunnel_init(); if (err) goto out_unregister_pernet; + + err = seg6_local_init(); + if (err) + goto out_unregister_pernet; #endif #ifdef CONFIG_IPV6_SEG6_HMAC @@ -471,6 +475,7 @@ out: #ifdef CONFIG_IPV6_SEG6_HMAC out_unregister_iptun: #ifdef CONFIG_IPV6_SEG6_LWTUNNEL + seg6_local_exit(); seg6_iptunnel_exit(); #endif #endif diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 264d772d3c7d..501233040570 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -91,7 +91,7 @@ static void set_tun_src(struct net *net, struct net_device *dev, } /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ -static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) { struct net *net = dev_net(skb_dst(skb)->dev); struct ipv6hdr *hdr, *inner_hdr; @@ -141,10 +141,10 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) return 0; } +EXPORT_SYMBOL_GPL(seg6_do_srh_encap); /* insert an SRH within an IPv6 packet, just after the IPv6 header */ -#ifdef CONFIG_IPV6_SEG6_INLINE -static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) { struct ipv6hdr *hdr, *oldhdr; struct ipv6_sr_hdr *isrh; @@ -193,7 +193,7 @@ static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) return 0; } -#endif +EXPORT_SYMBOL_GPL(seg6_do_srh_inline); static int seg6_do_srh(struct sk_buff *skb) { @@ -209,12 +209,10 @@ static int seg6_do_srh(struct sk_buff *skb) } switch (tinfo->mode) { -#ifdef CONFIG_IPV6_SEG6_INLINE case SEG6_IPTUN_MODE_INLINE: err = seg6_do_srh_inline(skb, tinfo->srh); skb_reset_inner_headers(skb); break; -#endif case SEG6_IPTUN_MODE_ENCAP: err = seg6_do_srh_encap(skb, tinfo->srh); break; @@ -357,10 +355,8 @@ static int seg6_build_state(struct nlattr *nla, return -EINVAL; switch (tuninfo->mode) { -#ifdef CONFIG_IPV6_SEG6_INLINE case SEG6_IPTUN_MODE_INLINE: break; -#endif case SEG6_IPTUN_MODE_ENCAP: break; default: diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c new file mode 100644 index 000000000000..147680e7a00c --- /dev/null +++ b/net/ipv6/seg6_local.c @@ -0,0 +1,766 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/net.h> +#include <linux/module.h> +#include <net/ip.h> +#include <net/lwtunnel.h> +#include <net/netevent.h> +#include <net/netns/generic.h> +#include <net/ip6_fib.h> +#include <net/route.h> +#include <net/seg6.h> +#include <linux/seg6.h> +#include <linux/seg6_local.h> +#include <net/addrconf.h> +#include <net/ip6_route.h> +#include <net/dst_cache.h> +#ifdef CONFIG_IPV6_SEG6_HMAC +#include <net/seg6_hmac.h> +#endif + +struct seg6_local_lwt; + +struct seg6_action_desc { + int action; + unsigned long attrs; + int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt); + int static_headroom; +}; + +struct seg6_local_lwt { + int action; + struct ipv6_sr_hdr *srh; + int table; + struct in_addr nh4; + struct in6_addr nh6; + int iif; + int oif; + + int headroom; + struct seg6_action_desc *desc; +}; + +static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt) +{ + return (struct seg6_local_lwt *)lwt->data; +} + +static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb) +{ + struct ipv6_sr_hdr *srh; + struct ipv6hdr *hdr; + int len; + + hdr = ipv6_hdr(skb); + if (hdr->nexthdr != IPPROTO_ROUTING) + return NULL; + + srh = (struct ipv6_sr_hdr *)(hdr + 1); + len = (srh->hdrlen + 1) << 3; + + if (!pskb_may_pull(skb, sizeof(*hdr) + len)) + return NULL; + + if (!seg6_validate_srh(srh, len)) + return NULL; + + return srh; +} + +static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb) +{ + struct ipv6_sr_hdr *srh; + + srh = get_srh(skb); + if (!srh) + return NULL; + + if (srh->segments_left == 0) + return NULL; + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (!seg6_hmac_validate_skb(skb)) + return NULL; +#endif + + return srh; +} + +/* regular endpoint function */ +static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct ipv6_sr_hdr *srh; + struct in6_addr *addr; + + srh = get_and_validate_srh(skb); + if (!srh) + goto drop; + + srh->segments_left--; + addr = srh->segments + srh->segments_left; + + ipv6_hdr(skb)->daddr = *addr; + + skb_dst_drop(skb); + ip6_route_input(skb); + + return dst_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +/* regular endpoint, and forward to specified nexthop */ +static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct net *net = dev_net(skb->dev); + struct ipv6_sr_hdr *srh; + struct dst_entry *dst; + struct in6_addr *addr; + struct ipv6hdr *hdr; + struct flowi6 fl6; + int flags; + + srh = get_and_validate_srh(skb); + if (!srh) + goto drop; + + srh->segments_left--; + addr = srh->segments + srh->segments_left; + + hdr = ipv6_hdr(skb); + hdr->daddr = *addr; + + skb_dst_drop(skb); + + fl6.flowi6_iif = skb->dev->ifindex; + fl6.daddr = slwt->nh6; + fl6.saddr = hdr->saddr; + fl6.flowlabel = ip6_flowinfo(hdr); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = hdr->nexthdr; + + flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE | + RT6_LOOKUP_F_REACHABLE; + + dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags); + if (dst->dev->flags & IFF_LOOPBACK) + goto drop; + + skb_dst_set(skb, dst); + + return dst_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +/* decapsulate and forward to specified nexthop */ +static int input_action_end_dx6(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + struct net *net = dev_net(skb->dev); + struct ipv6hdr *inner_hdr; + struct ipv6_sr_hdr *srh; + struct dst_entry *dst; + unsigned int off = 0; + struct flowi6 fl6; + bool use_nh; + int flags; + + /* this function accepts IPv6 encapsulated packets, with either + * an SRH with SL=0, or no SRH. + */ + + srh = get_srh(skb); + if (srh && srh->segments_left > 0) + goto drop; + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (srh && !seg6_hmac_validate_skb(skb)) + goto drop; +#endif + + if (ipv6_find_hdr(skb, &off, IPPROTO_IPV6, NULL, NULL) < 0) + goto drop; + + if (!pskb_pull(skb, off)) + goto drop; + + skb_postpull_rcsum(skb, skb_network_header(skb), off); + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->encapsulation = 0; + + inner_hdr = ipv6_hdr(skb); + + /* The inner packet is not associated to any local interface, + * so we do not call netif_rx(). + * + * If slwt->nh6 is set to ::, then lookup the nexthop for the + * inner packet's DA. Otherwise, use the specified nexthop. + */ + + use_nh = !ipv6_addr_any(&slwt->nh6); + + skb_dst_drop(skb); + + fl6.flowi6_iif = skb->dev->ifindex; + fl6.daddr = use_nh ? slwt->nh6 : inner_hdr->daddr; + fl6.saddr = inner_hdr->saddr; + fl6.flowlabel = ip6_flowinfo(inner_hdr); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = inner_hdr->nexthdr; + + flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_REACHABLE; + if (use_nh) + flags |= RT6_LOOKUP_F_IFACE; + + dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags); + if (dst->dev->flags & IFF_LOOPBACK) + goto drop; + + skb_dst_set(skb, dst); + + return dst_input(skb); +drop: + kfree_skb(skb); + return -EINVAL; +} + +/* push an SRH on top of the current one */ +static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct ipv6_sr_hdr *srh; + int err = -EINVAL; + + srh = get_and_validate_srh(skb); + if (!srh) + goto drop; + + err = seg6_do_srh_inline(skb, slwt->srh); + if (err) + goto drop; + + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + + skb_dst_drop(skb); + ip6_route_input(skb); + + return dst_input(skb); + +drop: + kfree_skb(skb); + return err; +} + +/* encapsulate within an outer IPv6 header and a specified SRH */ +static int input_action_end_b6_encap(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + struct ipv6_sr_hdr *srh; + struct in6_addr *addr; + int err = -EINVAL; + + srh = get_and_validate_srh(skb); + if (!srh) + goto drop; + + srh->segments_left--; + addr = srh->segments + srh->segments_left; + ipv6_hdr(skb)->daddr = *addr; + + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + + err = seg6_do_srh_encap(skb, slwt->srh); + if (err) + goto drop; + + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + + skb_dst_drop(skb); + ip6_route_input(skb); + + return dst_input(skb); + +drop: + kfree_skb(skb); + return err; +} + +static struct seg6_action_desc seg6_action_table[] = { + { + .action = SEG6_LOCAL_ACTION_END, + .attrs = 0, + .input = input_action_end, + }, + { + .action = SEG6_LOCAL_ACTION_END_X, + .attrs = (1 << SEG6_LOCAL_NH6), + .input = input_action_end_x, + }, + { + .action = SEG6_LOCAL_ACTION_END_DX6, + .attrs = (1 << SEG6_LOCAL_NH6), + .input = input_action_end_dx6, + }, + { + .action = SEG6_LOCAL_ACTION_END_B6, + .attrs = (1 << SEG6_LOCAL_SRH), + .input = input_action_end_b6, + }, + { + .action = SEG6_LOCAL_ACTION_END_B6_ENCAP, + .attrs = (1 << SEG6_LOCAL_SRH), + .input = input_action_end_b6_encap, + .static_headroom = sizeof(struct ipv6hdr), + } +}; + +static struct seg6_action_desc *__get_action_desc(int action) +{ + struct seg6_action_desc *desc; + int i, count; + + count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc); + for (i = 0; i < count; i++) { + desc = &seg6_action_table[i]; + if (desc->action == action) + return desc; + } + + return NULL; +} + +static int seg6_local_input(struct sk_buff *skb) +{ + struct dst_entry *orig_dst = skb_dst(skb); + struct seg6_action_desc *desc; + struct seg6_local_lwt *slwt; + + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); + desc = slwt->desc; + + return desc->input(skb, slwt); +} + +static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { + [SEG6_LOCAL_ACTION] = { .type = NLA_U32 }, + [SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, + [SEG6_LOCAL_TABLE] = { .type = NLA_U32 }, + [SEG6_LOCAL_NH4] = { .type = NLA_BINARY, + .len = sizeof(struct in_addr) }, + [SEG6_LOCAL_NH6] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [SEG6_LOCAL_IIF] = { .type = NLA_U32 }, + [SEG6_LOCAL_OIF] = { .type = NLA_U32 }, +}; + +static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt) +{ + struct ipv6_sr_hdr *srh; + int len; + + srh = nla_data(attrs[SEG6_LOCAL_SRH]); + len = nla_len(attrs[SEG6_LOCAL_SRH]); + + /* SRH must contain at least one segment */ + if (len < sizeof(*srh) + sizeof(struct in6_addr)) + return -EINVAL; + + if (!seg6_validate_srh(srh, len)) + return -EINVAL; + + slwt->srh = kmalloc(len, GFP_KERNEL); + if (!slwt->srh) + return -ENOMEM; + + memcpy(slwt->srh, srh, len); + + slwt->headroom += len; + + return 0; +} + +static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct ipv6_sr_hdr *srh; + struct nlattr *nla; + int len; + + srh = slwt->srh; + len = (srh->hdrlen + 1) << 3; + + nla = nla_reserve(skb, SEG6_LOCAL_SRH, len); + if (!nla) + return -EMSGSIZE; + + memcpy(nla_data(nla), srh, len); + + return 0; +} + +static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + int len = (a->srh->hdrlen + 1) << 3; + + if (len != ((b->srh->hdrlen + 1) << 3)) + return 1; + + return memcmp(a->srh, b->srh, len); +} + +static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt) +{ + slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]); + + return 0; +} + +static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table)) + return -EMSGSIZE; + + return 0; +} + +static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + if (a->table != b->table) + return 1; + + return 0; +} + +static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt) +{ + memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]), + sizeof(struct in_addr)); + + return 0; +} + +static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct nlattr *nla; + + nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr)); + if (!nla) + return -EMSGSIZE; + + memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr)); + + return 0; +} + +static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr)); +} + +static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt) +{ + memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]), + sizeof(struct in6_addr)); + + return 0; +} + +static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct nlattr *nla; + + nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr)); + if (!nla) + return -EMSGSIZE; + + memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr)); + + return 0; +} + +static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr)); +} + +static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt) +{ + slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]); + + return 0; +} + +static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif)) + return -EMSGSIZE; + + return 0; +} + +static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + if (a->iif != b->iif) + return 1; + + return 0; +} + +static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt) +{ + slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]); + + return 0; +} + +static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif)) + return -EMSGSIZE; + + return 0; +} + +static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + if (a->oif != b->oif) + return 1; + + return 0; +} + +struct seg6_action_param { + int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt); + int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt); + int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b); +}; + +static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = { + [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh, + .put = put_nla_srh, + .cmp = cmp_nla_srh }, + + [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table, + .put = put_nla_table, + .cmp = cmp_nla_table }, + + [SEG6_LOCAL_NH4] = { .parse = parse_nla_nh4, + .put = put_nla_nh4, + .cmp = cmp_nla_nh4 }, + + [SEG6_LOCAL_NH6] = { .parse = parse_nla_nh6, + .put = put_nla_nh6, + .cmp = cmp_nla_nh6 }, + + [SEG6_LOCAL_IIF] = { .parse = parse_nla_iif, + .put = put_nla_iif, + .cmp = cmp_nla_iif }, + + [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif, + .put = put_nla_oif, + .cmp = cmp_nla_oif }, +}; + +static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) +{ + struct seg6_action_param *param; + struct seg6_action_desc *desc; + int i, err; + + desc = __get_action_desc(slwt->action); + if (!desc) + return -EINVAL; + + if (!desc->input) + return -EOPNOTSUPP; + + slwt->desc = desc; + slwt->headroom += desc->static_headroom; + + for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { + if (desc->attrs & (1 << i)) { + if (!attrs[i]) + return -EINVAL; + + param = &seg6_action_params[i]; + + err = param->parse(attrs, slwt); + if (err < 0) + return err; + } + } + + return 0; +} + +static int seg6_local_build_state(struct nlattr *nla, unsigned int family, + const void *cfg, struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[SEG6_LOCAL_MAX + 1]; + struct lwtunnel_state *newts; + struct seg6_local_lwt *slwt; + int err; + + err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy, + extack); + + if (err < 0) + return err; + + if (!tb[SEG6_LOCAL_ACTION]) + return -EINVAL; + + newts = lwtunnel_state_alloc(sizeof(*slwt)); + if (!newts) + return -ENOMEM; + + slwt = seg6_local_lwtunnel(newts); + slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]); + + err = parse_nla_action(tb, slwt); + if (err < 0) + goto out_free; + + newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL; + newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT; + newts->headroom = slwt->headroom; + + *ts = newts; + + return 0; + +out_free: + kfree(slwt->srh); + kfree(newts); + return err; +} + +static void seg6_local_destroy_state(struct lwtunnel_state *lwt) +{ + struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); + + kfree(slwt->srh); +} + +static int seg6_local_fill_encap(struct sk_buff *skb, + struct lwtunnel_state *lwt) +{ + struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); + struct seg6_action_param *param; + int i, err; + + if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action)) + return -EMSGSIZE; + + for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { + if (slwt->desc->attrs & (1 << i)) { + param = &seg6_action_params[i]; + err = param->put(skb, slwt); + if (err < 0) + return err; + } + } + + return 0; +} + +static int seg6_local_get_encap_size(struct lwtunnel_state *lwt) +{ + struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); + unsigned long attrs; + int nlsize; + + nlsize = nla_total_size(4); /* action */ + + attrs = slwt->desc->attrs; + + if (attrs & (1 << SEG6_LOCAL_SRH)) + nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3); + + if (attrs & (1 << SEG6_LOCAL_TABLE)) + nlsize += nla_total_size(4); + + if (attrs & (1 << SEG6_LOCAL_NH4)) + nlsize += nla_total_size(4); + + if (attrs & (1 << SEG6_LOCAL_NH6)) + nlsize += nla_total_size(16); + + if (attrs & (1 << SEG6_LOCAL_IIF)) + nlsize += nla_total_size(4); + + if (attrs & (1 << SEG6_LOCAL_OIF)) + nlsize += nla_total_size(4); + + return nlsize; +} + +static int seg6_local_cmp_encap(struct lwtunnel_state *a, + struct lwtunnel_state *b) +{ + struct seg6_local_lwt *slwt_a, *slwt_b; + struct seg6_action_param *param; + int i; + + slwt_a = seg6_local_lwtunnel(a); + slwt_b = seg6_local_lwtunnel(b); + + if (slwt_a->action != slwt_b->action) + return 1; + + if (slwt_a->desc->attrs != slwt_b->desc->attrs) + return 1; + + for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { + if (slwt_a->desc->attrs & (1 << i)) { + param = &seg6_action_params[i]; + if (param->cmp(slwt_a, slwt_b)) + return 1; + } + } + + return 0; +} + +static const struct lwtunnel_encap_ops seg6_local_ops = { + .build_state = seg6_local_build_state, + .destroy_state = seg6_local_destroy_state, + .input = seg6_local_input, + .fill_encap = seg6_local_fill_encap, + .get_encap_size = seg6_local_get_encap_size, + .cmp_encap = seg6_local_cmp_encap, + .owner = THIS_MODULE, +}; + +int __init seg6_local_init(void) +{ + return lwtunnel_encap_add_ops(&seg6_local_ops, + LWTUNNEL_ENCAP_SEG6_LOCAL); +} + +void seg6_local_exit(void) +{ + lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL); +} diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2521690d62d6..f776ec4ecf6d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -350,7 +350,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __inet6_lookup_established(net, &tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, ntohs(th->source), - skb->dev->ifindex); + skb->dev->ifindex, inet6_sdif(skb)); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), @@ -918,7 +918,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) &tcp_hashinfo, NULL, 0, &ipv6h->saddr, th->source, &ipv6h->daddr, - ntohs(th->source), tcp_v6_iif(skb)); + ntohs(th->source), tcp_v6_iif(skb), + tcp_v6_sdif(skb)); if (!sk1) goto out; @@ -1296,7 +1297,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } } - tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len); + tcp_rcv_established(sk, skb, tcp_hdr(skb)); if (opt_skb) goto ipv6_pktoptions; return 0; @@ -1397,6 +1398,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, static int tcp_v6_rcv(struct sk_buff *skb) { + int sdif = inet6_sdif(skb); const struct tcphdr *th; const struct ipv6hdr *hdr; bool refcounted; @@ -1430,7 +1432,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) lookup: sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), - th->source, th->dest, inet6_iif(skb), + th->source, th->dest, inet6_iif(skb), sdif, &refcounted); if (!sk) goto no_tcp_socket; @@ -1505,8 +1507,7 @@ process: tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { - if (!tcp_prequeue(sk, skb)) - ret = tcp_v6_do_rcv(sk, skb); + ret = tcp_v6_do_rcv(sk, skb); } else if (tcp_add_backlog(sk, skb)) { goto discard_and_relse; } @@ -1564,7 +1565,8 @@ do_time_wait: skb, __tcp_hdrlen(th), &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, - ntohs(th->dest), tcp_v6_iif(skb)); + ntohs(th->dest), tcp_v6_iif(skb), + sdif); if (sk2) { struct inet_timewait_sock *tw = inet_twsk(sk); inet_twsk_deschedule_put(tw); @@ -1611,7 +1613,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, &hdr->saddr, th->source, &hdr->daddr, ntohs(th->dest), - inet6_iif(skb)); + inet6_iif(skb), inet6_sdif(skb)); if (sk) { skb->sk = sk; skb->destructor = sock_edemux; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 578142b7ca3e..19afcaf4a22e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -129,7 +129,7 @@ static void udp_v6_rehash(struct sock *sk) static int compute_score(struct sock *sk, struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned short hnum, - int dif, bool exact_dif) + int dif, int sdif, bool exact_dif) { int score; struct inet_sock *inet; @@ -161,9 +161,13 @@ static int compute_score(struct sock *sk, struct net *net, } if (sk->sk_bound_dev_if || exact_dif) { - if (sk->sk_bound_dev_if != dif) + bool dev_match = (sk->sk_bound_dev_if == dif || + sk->sk_bound_dev_if == sdif); + + if (exact_dif && !dev_match) return -1; - score++; + if (sk->sk_bound_dev_if && dev_match) + score++; } if (sk->sk_incoming_cpu == raw_smp_processor_id()) @@ -175,9 +179,9 @@ static int compute_score(struct sock *sk, struct net *net, /* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, unsigned int hnum, int dif, - bool exact_dif, struct udp_hslot *hslot2, - struct sk_buff *skb) + const struct in6_addr *daddr, unsigned int hnum, + int dif, int sdif, bool exact_dif, + struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; int score, badness, matches = 0, reuseport = 0; @@ -187,7 +191,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif, exact_dif); + daddr, hnum, dif, sdif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -214,10 +218,10 @@ static struct sock *udp6_lib_lookup2(struct net *net, /* rcu_read_lock() must be held */ struct sock *__udp6_lib_lookup(struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, __be16 dport, - int dif, struct udp_table *udptable, - struct sk_buff *skb) + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, __be16 dport, + int dif, int sdif, struct udp_table *udptable, + struct sk_buff *skb) { struct sock *sk, *result; unsigned short hnum = ntohs(dport); @@ -235,7 +239,7 @@ struct sock *__udp6_lib_lookup(struct net *net, goto begin; result = udp6_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, exact_dif, + daddr, hnum, dif, sdif, exact_dif, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; @@ -250,7 +254,7 @@ struct sock *__udp6_lib_lookup(struct net *net, goto begin; result = udp6_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, + daddr, hnum, dif, sdif, exact_dif, hslot2, skb); } @@ -261,7 +265,7 @@ begin: badness = -1; sk_for_each_rcu(sk, &hslot->head) { score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, - exact_dif); + sdif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -294,7 +298,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - udptable, skb); + inet6_sdif(skb), udptable, skb); } struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, @@ -304,7 +308,7 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - &udp_table, skb); + inet6_sdif(skb), &udp_table, skb); } EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb); @@ -320,7 +324,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be struct sock *sk; sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, - dif, &udp_table, NULL); + dif, 0, &udp_table, NULL); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; @@ -501,7 +505,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, - inet6_iif(skb), udptable, skb); + inet6_iif(skb), 0, udptable, skb); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); @@ -893,7 +897,7 @@ discard: static struct sock *__udp6_lib_demux_lookup(struct net *net, __be16 loc_port, const struct in6_addr *loc_addr, __be16 rmt_port, const struct in6_addr *rmt_addr, - int dif) + int dif, int sdif) { unsigned short hnum = ntohs(loc_port); unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum); @@ -904,7 +908,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { if (sk->sk_state == TCP_ESTABLISHED && - INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif)) + INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif)) return sk; /* Only check first socket in chain */ break; @@ -919,6 +923,7 @@ static void udp_v6_early_demux(struct sk_buff *skb) struct sock *sk; struct dst_entry *dst; int dif = skb->dev->ifindex; + int sdif = inet6_sdif(skb); if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) @@ -930,7 +935,7 @@ static void udp_v6_early_demux(struct sk_buff *skb) sk = __udp6_lib_demux_lookup(net, uh->dest, &ipv6_hdr(skb)->daddr, uh->source, &ipv6_hdr(skb)->saddr, - dif); + dif, sdif); else return; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index e7d378c032cb..455fd4e39333 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -17,109 +17,15 @@ #include <net/ip6_checksum.h> #include "ip6_offload.h" -static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - unsigned int unfrag_ip6hlen, unfrag_len; - struct frag_hdr *fptr; - u8 *packet_start, *prevhdr; - u8 nexthdr; - u8 frag_hdr_sz = sizeof(struct frag_hdr); - __wsum csum; - int tnl_hlen; - int err; - - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - /* Set the IPv6 fragment id if not set yet */ - if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(dev_net(skb->dev), skb); - - segs = NULL; - goto out; - } if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) segs = skb_udp_tunnel_segment(skb, features, true); - else { - const struct ipv6hdr *ipv6h; - struct udphdr *uh; - - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto out; - - /* Do software UFO. Complete and fill in the UDP checksum as HW cannot - * do checksum of UDP packets sent as multiple IP fragments. - */ - - uh = udp_hdr(skb); - ipv6h = ipv6_hdr(skb); - - uh->check = 0; - csum = skb_checksum(skb, 0, skb->len, 0); - uh->check = udp_v6_check(skb->len, &ipv6h->saddr, - &ipv6h->daddr, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - - /* If there is no outer header we can fake a checksum offload - * due to the fact that we have already done the checksum in - * software prior to segmenting the frame. - */ - if (!skb->encap_hdr_csum) - features |= NETIF_F_HW_CSUM; - - /* Check if there is enough headroom to insert fragment header. */ - tnl_hlen = skb_tnl_header_len(skb); - if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) { - if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) - goto out; - } - - /* Find the unfragmentable header and shift it left by frag_hdr_sz - * bytes to insert fragment header. - */ - err = ip6_find_1stfragopt(skb, &prevhdr); - if (err < 0) - return ERR_PTR(err); - unfrag_ip6hlen = err; - nexthdr = *prevhdr; - *prevhdr = NEXTHDR_FRAGMENT; - unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + - unfrag_ip6hlen + tnl_hlen; - packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset; - memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len); - - SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz; - skb->mac_header -= frag_hdr_sz; - skb->network_header -= frag_hdr_sz; - - fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); - fptr->nexthdr = nexthdr; - fptr->reserved = 0; - if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(dev_net(skb->dev), skb); - fptr->identification = skb_shinfo(skb)->ip6_frag_id; - - /* Fragment the skb. ipv6 header and the remaining fields of the - * fragment header are updated in ipv6_gso_segment() - */ - segs = skb_segment(skb, features); - } -out: return segs; } @@ -169,7 +75,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) static const struct net_offload udpv6_offload = { .callbacks = { - .gso_segment = udp6_ufo_fragment, + .gso_segment = udp6_tunnel_segment, .gro_receive = udp6_gro_receive, .gro_complete = udp6_gro_complete, }, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 79651bc71bf0..f44b25a48478 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -214,14 +214,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) } } -static inline int xfrm6_garbage_collect(struct dst_ops *ops) -{ - struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); - - xfrm_garbage_collect_deferred(net); - return dst_entries_get_fast(ops) > ops->gc_thresh * 2; -} - static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { @@ -279,14 +271,13 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm6_dst_ops_template = { .family = AF_INET6, - .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, .redirect = xfrm6_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, - .gc_thresh = INT_MAX, + .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index c343ac60bf50..c748e8a6a72c 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -155,8 +155,8 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, seq_printf(seq, " psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ", psock->index, - psock->strp.stats.rx_msgs, - psock->strp.stats.rx_bytes, + psock->strp.stats.msgs, + psock->strp.stats.bytes, psock->stats.tx_msgs, psock->stats.tx_bytes, psock->sk->sk_receive_queue.qlen, @@ -170,22 +170,22 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, if (psock->tx_stopped) seq_puts(seq, "TxStop "); - if (psock->strp.rx_stopped) + if (psock->strp.stopped) seq_puts(seq, "RxStop "); if (psock->tx_kcm) seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index); - if (!psock->strp.rx_paused && !psock->ready_rx_msg) { + if (!psock->strp.paused && !psock->ready_rx_msg) { if (psock->sk->sk_receive_queue.qlen) { - if (psock->strp.rx_need_bytes) + if (psock->strp.need_bytes) seq_printf(seq, "RxWait=%u ", - psock->strp.rx_need_bytes); + psock->strp.need_bytes); else seq_printf(seq, "RxWait "); } } else { - if (psock->strp.rx_paused) + if (psock->strp.paused) seq_puts(seq, "RxPause "); if (psock->ready_rx_msg) @@ -371,20 +371,20 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n", "", - strp_stats.rx_msgs, - strp_stats.rx_bytes, + strp_stats.msgs, + strp_stats.bytes, psock_stats.tx_msgs, psock_stats.tx_bytes, psock_stats.reserved, psock_stats.unreserved, - strp_stats.rx_aborts, - strp_stats.rx_interrupted, - strp_stats.rx_unrecov_intr, - strp_stats.rx_mem_fail, - strp_stats.rx_need_more_hdr, - strp_stats.rx_bad_hdr_len, - strp_stats.rx_msg_too_big, - strp_stats.rx_msg_timeouts, + strp_stats.aborts, + strp_stats.interrupted, + strp_stats.unrecov_intr, + strp_stats.mem_fail, + strp_stats.need_more_hdr, + strp_stats.bad_hdr_len, + strp_stats.msg_too_big, + strp_stats.msg_timeouts, psock_stats.tx_aborts); return 0; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index da49191f7ad0..88ce73288247 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -96,12 +96,12 @@ static void kcm_update_rx_mux_stats(struct kcm_mux *mux, struct kcm_psock *psock) { STRP_STATS_ADD(mux->stats.rx_bytes, - psock->strp.stats.rx_bytes - + psock->strp.stats.bytes - psock->saved_rx_bytes); mux->stats.rx_msgs += - psock->strp.stats.rx_msgs - psock->saved_rx_msgs; - psock->saved_rx_msgs = psock->strp.stats.rx_msgs; - psock->saved_rx_bytes = psock->strp.stats.rx_bytes; + psock->strp.stats.msgs - psock->saved_rx_msgs; + psock->saved_rx_msgs = psock->strp.stats.msgs; + psock->saved_rx_bytes = psock->strp.stats.bytes; } static void kcm_update_tx_mux_stats(struct kcm_mux *mux, @@ -1118,7 +1118,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, struct kcm_sock *kcm = kcm_sk(sk); int err = 0; long timeo; - struct strp_rx_msg *rxm; + struct strp_msg *stm; int copied = 0; struct sk_buff *skb; @@ -1132,26 +1132,26 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, /* Okay, have a message on the receive queue */ - rxm = strp_rx_msg(skb); + stm = strp_msg(skb); - if (len > rxm->full_len) - len = rxm->full_len; + if (len > stm->full_len) + len = stm->full_len; - err = skb_copy_datagram_msg(skb, rxm->offset, msg, len); + err = skb_copy_datagram_msg(skb, stm->offset, msg, len); if (err < 0) goto out; copied = len; if (likely(!(flags & MSG_PEEK))) { KCM_STATS_ADD(kcm->stats.rx_bytes, copied); - if (copied < rxm->full_len) { + if (copied < stm->full_len) { if (sock->type == SOCK_DGRAM) { /* Truncated message */ msg->msg_flags |= MSG_TRUNC; goto msg_finished; } - rxm->offset += copied; - rxm->full_len -= copied; + stm->offset += copied; + stm->full_len -= copied; } else { msg_finished: /* Finished with message */ @@ -1175,7 +1175,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); long timeo; - struct strp_rx_msg *rxm; + struct strp_msg *stm; int err = 0; ssize_t copied; struct sk_buff *skb; @@ -1192,12 +1192,12 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, /* Okay, have a message on the receive queue */ - rxm = strp_rx_msg(skb); + stm = strp_msg(skb); - if (len > rxm->full_len) - len = rxm->full_len; + if (len > stm->full_len) + len = stm->full_len; - copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags); + copied = skb_splice_bits(skb, sk, stm->offset, pipe, len, flags); if (copied < 0) { err = copied; goto err_out; @@ -1205,8 +1205,8 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, KCM_STATS_ADD(kcm->stats.rx_bytes, copied); - rxm->offset += copied; - rxm->full_len -= copied; + stm->offset += copied; + stm->full_len -= copied; /* We have no way to return MSG_EOR. If all the bytes have been * read we still leave the message in the receive socket buffer. diff --git a/net/key/af_key.c b/net/key/af_key.c index ca9d3ae665e7..10d7133e4fe9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2398,8 +2398,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa out: xfrm_pol_put(xp); - if (err == 0) - xfrm_garbage_collect(net); return err; } @@ -2650,8 +2648,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ out: xfrm_pol_put(xp); - if (delete && err == 0) - xfrm_garbage_collect(net); return err; } @@ -2751,8 +2747,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad int err, err2; err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); - if (!err) - xfrm_garbage_collect(net); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - old silent behavior */ diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index ea4f481839dd..c5b9ce41d66f 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2479,12 +2479,12 @@ static int __init mpls_init(void) rtnl_af_register(&mpls_af_ops); - rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL); - rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL); + rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0); + rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0); rtnl_register(PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes, - NULL); + 0); rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf, - mpls_netconf_dump_devconf, NULL); + mpls_netconf_dump_devconf, 0); err = 0; out: return err; diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index d767e35fff6b..ade4c10c28c6 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -125,7 +125,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, __tcp_hdrlen(tcph), saddr, sport, daddr, dport, - in->ifindex); + in->ifindex, 0); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; @@ -195,7 +195,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, thoff + __tcp_hdrlen(tcph), saddr, sport, daddr, ntohs(dport), - in->ifindex); + in->ifindex, 0); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; @@ -208,7 +208,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, case NFT_LOOKUP_ESTABLISHED: sk = __inet6_lookup_established(net, &tcp_hashinfo, saddr, sport, daddr, ntohs(dport), - in->ifindex); + in->ifindex, 0); break; default: BUG(); diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 3f6c4fa78bdb..245fa350a7a8 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -106,7 +106,7 @@ static DEFINE_SPINLOCK(recent_lock); static DEFINE_MUTEX(recent_mutex); #ifdef CONFIG_PROC_FS -static const struct file_operations recent_old_fops, recent_mt_fops; +static const struct file_operations recent_mt_fops; #endif static u_int32_t hash_rnd __read_mostly; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 45fe8c8a884d..f6e229b51dfb 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -335,8 +335,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info, uint32_t cutlen) { - unsigned short gso_type = skb_shinfo(skb)->gso_type; - struct sw_flow_key later_key; struct sk_buff *segs, *nskb; int err; @@ -347,21 +345,9 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, if (segs == NULL) return -EINVAL; - if (gso_type & SKB_GSO_UDP) { - /* The initial flow key extracted by ovs_flow_key_extract() - * in this case is for a first fragment, so we need to - * properly mark later fragments. - */ - later_key = *key; - later_key.ip.frag = OVS_FRAG_TYPE_LATER; - } - /* Queue all of the segments. */ skb = segs; do { - if (gso_type & SKB_GSO_UDP && skb != segs) - key = &later_key; - err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); if (err) break; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 3f76cb765e5b..8c94cef25a72 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -72,8 +72,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, const struct sk_buff *skb) { struct flow_stats *stats; - int node = numa_node_id(); - int cpu = smp_processor_id(); + unsigned int cpu = smp_processor_id(); int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); stats = rcu_dereference(flow->stats[cpu]); @@ -108,7 +107,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, __GFP_THISNODE | __GFP_NOWARN | __GFP_NOMEMALLOC, - node); + numa_node_id()); if (likely(new_stats)) { new_stats->used = jiffies; new_stats->packet_count = 1; @@ -118,6 +117,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, rcu_assign_pointer(flow->stats[cpu], new_stats); + cpumask_set_cpu(cpu, &flow->cpu_used_mask); goto unlock; } } @@ -145,7 +145,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow, memset(ovs_stats, 0, sizeof(*ovs_stats)); /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) { struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]); if (stats) { @@ -169,7 +169,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow) int cpu; /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) { struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]); if (stats) { @@ -584,8 +584,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) key->ip.frag = OVS_FRAG_TYPE_LATER; return 0; } - if (nh->frag_off & htons(IP_MF) || - skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + if (nh->frag_off & htons(IP_MF)) key->ip.frag = OVS_FRAG_TYPE_FIRST; else key->ip.frag = OVS_FRAG_TYPE_NONE; @@ -701,9 +700,6 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) if (key->ip.frag == OVS_FRAG_TYPE_LATER) return 0; - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - key->ip.frag = OVS_FRAG_TYPE_FIRST; - /* Transport layer. */ if (key->ip.proto == NEXTHDR_TCP) { if (tcphdr_ok(skb)) { diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a9bc1c875965..1875bba4f865 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -31,6 +31,7 @@ #include <linux/jiffies.h> #include <linux/time.h> #include <linux/flex_array.h> +#include <linux/cpumask.h> #include <net/inet_ecn.h> #include <net/ip_tunnels.h> #include <net/dst_metadata.h> @@ -219,6 +220,7 @@ struct sw_flow { */ struct sw_flow_key key; struct sw_flow_id id; + struct cpumask cpu_used_mask; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; struct flow_stats __rcu *stats[]; /* One for each CPU. First one diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index ea7a8073fa02..80ea2a71852e 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -98,6 +98,8 @@ struct sw_flow *ovs_flow_alloc(void) RCU_INIT_POINTER(flow->stats[0], stats); + cpumask_set_cpu(0, &flow->cpu_used_mask); + return flow; err: kmem_cache_free(flow_cache, flow); @@ -141,7 +143,7 @@ static void flow_free(struct sw_flow *flow) if (flow->sf_acts) ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) if (flow->stats[cpu]) kmem_cache_free(flow_stats_cache, (struct flow_stats __force *)flow->stats[cpu]); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 008a45ca3112..f31cb71172e0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -177,8 +177,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, #define BLK_PLUS_PRIV(sz_of_priv) \ (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) -#define PGV_FROM_VMALLOC 1 - #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) #define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) #define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt) diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 45b3af3080d8..da754fc926e7 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -300,15 +300,15 @@ out: int __init phonet_netlink_register(void) { int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, - NULL, NULL); + NULL, 0); if (err) return err; /* Further __rtnl_register() cannot fail */ - __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, NULL); - __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, NULL); - __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, NULL); - __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, NULL); - __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, NULL); + __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, 0); + __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, 0); + __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, 0); + __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0); + __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, 0); return 0; } diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 5586609afa27..c2f5c13550c0 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1081,7 +1081,7 @@ static int __init qrtr_proto_init(void) return rc; } - rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, NULL); + rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0); return 0; } diff --git a/net/rds/connection.c b/net/rds/connection.c index 50a3789ac23e..7ee2d5d68b78 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -151,6 +151,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, struct rds_transport *loop_trans; unsigned long flags; int ret, i; + int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); rcu_read_lock(); conn = rds_conn_lookup(net, head, laddr, faddr, trans); @@ -172,6 +173,12 @@ static struct rds_connection *__rds_conn_create(struct net *net, conn = ERR_PTR(-ENOMEM); goto out; } + conn->c_path = kcalloc(npaths, sizeof(struct rds_conn_path), gfp); + if (!conn->c_path) { + kmem_cache_free(rds_conn_slab, conn); + conn = ERR_PTR(-ENOMEM); + goto out; + } INIT_HLIST_NODE(&conn->c_hash_node); conn->c_laddr = laddr; @@ -181,6 +188,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, ret = rds_cong_get_maps(conn); if (ret) { + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); conn = ERR_PTR(ret); goto out; @@ -207,13 +215,14 @@ static struct rds_connection *__rds_conn_create(struct net *net, conn->c_trans = trans; init_waitqueue_head(&conn->c_hs_waitq); - for (i = 0; i < RDS_MPATH_WORKERS; i++) { + for (i = 0; i < npaths; i++) { __rds_conn_path_init(conn, &conn->c_path[i], is_outgoing); conn->c_path[i].cp_index = i; } ret = trans->conn_alloc(conn, gfp); if (ret) { + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); conn = ERR_PTR(ret); goto out; @@ -236,6 +245,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, /* Creating passive conn */ if (parent->c_passive) { trans->conn_free(conn->c_path[0].cp_transport_data); + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); conn = parent->c_passive; } else { @@ -252,7 +262,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, struct rds_conn_path *cp; int i; - for (i = 0; i < RDS_MPATH_WORKERS; i++) { + for (i = 0; i < npaths; i++) { cp = &conn->c_path[i]; /* The ->conn_alloc invocation may have * allocated resource for all paths, so all @@ -261,6 +271,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, if (cp->cp_transport_data) trans->conn_free(cp->cp_transport_data); } + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); conn = found; } else { @@ -374,13 +385,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) if (!cp->cp_transport_data) return; - rds_conn_path_drop(cp); - flush_work(&cp->cp_down_w); - /* make sure lingering queued work won't try to ref the conn */ cancel_delayed_work_sync(&cp->cp_send_w); cancel_delayed_work_sync(&cp->cp_recv_w); + rds_conn_path_drop(cp, true); + flush_work(&cp->cp_down_w); + /* tear down queued messages */ list_for_each_entry_safe(rm, rtmp, &cp->cp_send_queue, @@ -407,6 +418,7 @@ void rds_conn_destroy(struct rds_connection *conn) unsigned long flags; int i; struct rds_conn_path *cp; + int npaths = (conn->c_trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); rdsdebug("freeing conn %p for %pI4 -> " "%pI4\n", conn, &conn->c_laddr, @@ -420,7 +432,7 @@ void rds_conn_destroy(struct rds_connection *conn) synchronize_rcu(); /* shut the connection down */ - for (i = 0; i < RDS_MPATH_WORKERS; i++) { + for (i = 0; i < npaths; i++) { cp = &conn->c_path[i]; rds_conn_path_destroy(cp); BUG_ON(!list_empty(&cp->cp_retrans)); @@ -434,6 +446,7 @@ void rds_conn_destroy(struct rds_connection *conn) rds_cong_remove_conn(conn); put_net(conn->c_net); + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); spin_lock_irqsave(&rds_conn_lock, flags); @@ -464,8 +477,12 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, i++, head++) { hlist_for_each_entry_rcu(conn, head, c_hash_node) { struct rds_conn_path *cp; + int npaths; + + npaths = (conn->c_trans->t_mp_capable ? + RDS_MPATH_WORKERS : 1); - for (j = 0; j < RDS_MPATH_WORKERS; j++) { + for (j = 0; j < npaths; j++) { cp = &conn->c_path[j]; if (want_send) list = &cp->cp_send_queue; @@ -486,8 +503,6 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, } spin_unlock_irqrestore(&cp->cp_lock, flags); - if (!conn->c_trans->t_mp_capable) - break; } } } @@ -571,15 +586,16 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len, i++, head++) { hlist_for_each_entry_rcu(conn, head, c_hash_node) { struct rds_conn_path *cp; + int npaths; - for (j = 0; j < RDS_MPATH_WORKERS; j++) { + npaths = (conn->c_trans->t_mp_capable ? + RDS_MPATH_WORKERS : 1); + for (j = 0; j < npaths; j++) { cp = &conn->c_path[j]; /* XXX no cp_lock usage.. */ if (!visitor(cp, buffer)) continue; - if (!conn->c_trans->t_mp_capable) - break; } /* We copy as much as we can fit in the buffer, @@ -664,9 +680,13 @@ void rds_conn_exit(void) /* * Force a disconnect */ -void rds_conn_path_drop(struct rds_conn_path *cp) +void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy) { atomic_set(&cp->cp_state, RDS_CONN_ERROR); + + if (!destroy && cp->cp_conn->c_destroy_in_prog) + return; + queue_work(rds_wq, &cp->cp_down_w); } EXPORT_SYMBOL_GPL(rds_conn_path_drop); @@ -674,7 +694,7 @@ EXPORT_SYMBOL_GPL(rds_conn_path_drop); void rds_conn_drop(struct rds_connection *conn) { WARN_ON(conn->c_trans->t_mp_capable); - rds_conn_path_drop(&conn->c_path[0]); + rds_conn_path_drop(&conn->c_path[0], false); } EXPORT_SYMBOL_GPL(rds_conn_drop); @@ -706,5 +726,5 @@ __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...) vprintk(fmt, ap); va_end(ap); - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); } diff --git a/net/rds/rds.h b/net/rds/rds.h index 516bcc89b46f..2e0315b159cb 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -154,7 +154,7 @@ struct rds_connection { struct list_head c_map_item; unsigned long c_map_queued; - struct rds_conn_path c_path[RDS_MPATH_WORKERS]; + struct rds_conn_path *c_path; wait_queue_head_t c_hs_waitq; /* handshake waitq */ u32 c_my_gen_num; @@ -700,7 +700,7 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net, void rds_conn_shutdown(struct rds_conn_path *cpath); void rds_conn_destroy(struct rds_connection *conn); void rds_conn_drop(struct rds_connection *conn); -void rds_conn_path_drop(struct rds_conn_path *cpath); +void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy); void rds_conn_connect_if_down(struct rds_connection *conn); void rds_conn_path_connect_if_down(struct rds_conn_path *cp); void rds_for_each_conn_info(struct socket *sock, unsigned int len, diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 431404dbdad1..6b7ee71f40c6 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -592,7 +592,7 @@ static void rds_tcp_sysctl_reset(struct net *net) continue; /* reconnect with new parameters */ - rds_conn_path_drop(tc->t_cpath); + rds_conn_path_drop(tc->t_cpath, false); } spin_unlock_irq(&rds_tcp_conn_lock); } diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index cbe08a1fa4c7..46f74dad0e16 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -69,14 +69,14 @@ void rds_tcp_state_change(struct sock *sk) if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) && rds_conn_path_transition(cp, RDS_CONN_CONNECTING, RDS_CONN_ERROR)) { - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); } else { rds_connect_path_complete(cp, RDS_CONN_CONNECTING); } break; case TCP_CLOSE_WAIT: case TCP_CLOSE: - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); default: break; } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 0d8616aa5bad..dc860d1bb608 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -157,7 +157,7 @@ out: "returned %d, " "disconnecting and reconnecting\n", &conn->c_faddr, cp->cp_index, ret); - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); } } } diff --git a/net/rds/threads.c b/net/rds/threads.c index 2852bc1d37d4..f121daa402c8 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -78,7 +78,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr) "current state is %d\n", __func__, atomic_read(&cp->cp_state)); - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); return; } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 69b97339ff9d..8c0db9b3e4ab 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -15,7 +15,7 @@ #include <net/netns/generic.h> #include <net/sock.h> #include <net/af_rxrpc.h> -#include <rxrpc/packet.h> +#include "protocol.h" #if 0 #define CHECK_SLAB_OKAY(X) \ diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h new file mode 100644 index 000000000000..4bddcf3face3 --- /dev/null +++ b/net/rxrpc/protocol.h @@ -0,0 +1,190 @@ +/* packet.h: Rx packet layout and definitions + * + * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_RXRPC_PACKET_H +#define _LINUX_RXRPC_PACKET_H + +typedef u32 rxrpc_seq_t; /* Rx message sequence number */ +typedef u32 rxrpc_serial_t; /* Rx message serial number */ +typedef __be32 rxrpc_seq_net_t; /* on-the-wire Rx message sequence number */ +typedef __be32 rxrpc_serial_net_t; /* on-the-wire Rx message serial number */ + +/*****************************************************************************/ +/* + * on-the-wire Rx packet header + * - all multibyte fields should be in network byte order + */ +struct rxrpc_wire_header { + __be32 epoch; /* client boot timestamp */ +#define RXRPC_RANDOM_EPOCH 0x80000000 /* Random if set, date-based if not */ + + __be32 cid; /* connection and channel ID */ +#define RXRPC_MAXCALLS 4 /* max active calls per conn */ +#define RXRPC_CHANNELMASK (RXRPC_MAXCALLS-1) /* mask for channel ID */ +#define RXRPC_CIDMASK (~RXRPC_CHANNELMASK) /* mask for connection ID */ +#define RXRPC_CIDSHIFT ilog2(RXRPC_MAXCALLS) /* shift for connection ID */ +#define RXRPC_CID_INC (1 << RXRPC_CIDSHIFT) /* connection ID increment */ + + __be32 callNumber; /* call ID (0 for connection-level packets) */ + __be32 seq; /* sequence number of pkt in call stream */ + __be32 serial; /* serial number of pkt sent to network */ + + uint8_t type; /* packet type */ +#define RXRPC_PACKET_TYPE_DATA 1 /* data */ +#define RXRPC_PACKET_TYPE_ACK 2 /* ACK */ +#define RXRPC_PACKET_TYPE_BUSY 3 /* call reject */ +#define RXRPC_PACKET_TYPE_ABORT 4 /* call/connection abort */ +#define RXRPC_PACKET_TYPE_ACKALL 5 /* ACK all outstanding packets on call */ +#define RXRPC_PACKET_TYPE_CHALLENGE 6 /* connection security challenge (SRVR->CLNT) */ +#define RXRPC_PACKET_TYPE_RESPONSE 7 /* connection secutity response (CLNT->SRVR) */ +#define RXRPC_PACKET_TYPE_DEBUG 8 /* debug info request */ +#define RXRPC_PACKET_TYPE_VERSION 13 /* version string request */ +#define RXRPC_N_PACKET_TYPES 14 /* number of packet types (incl type 0) */ + + uint8_t flags; /* packet flags */ +#define RXRPC_CLIENT_INITIATED 0x01 /* signifies a packet generated by a client */ +#define RXRPC_REQUEST_ACK 0x02 /* request an unconditional ACK of this packet */ +#define RXRPC_LAST_PACKET 0x04 /* the last packet from this side for this call */ +#define RXRPC_MORE_PACKETS 0x08 /* more packets to come */ +#define RXRPC_JUMBO_PACKET 0x20 /* [DATA] this is a jumbo packet */ +#define RXRPC_SLOW_START_OK 0x20 /* [ACK] slow start supported */ + + uint8_t userStatus; /* app-layer defined status */ +#define RXRPC_USERSTATUS_SERVICE_UPGRADE 0x01 /* AuriStor service upgrade request */ + + uint8_t securityIndex; /* security protocol ID */ + union { + __be16 _rsvd; /* reserved */ + __be16 cksum; /* kerberos security checksum */ + }; + __be16 serviceId; /* service ID */ + +} __packed; + +#define RXRPC_SUPPORTED_PACKET_TYPES ( \ + (1 << RXRPC_PACKET_TYPE_DATA) | \ + (1 << RXRPC_PACKET_TYPE_ACK) | \ + (1 << RXRPC_PACKET_TYPE_BUSY) | \ + (1 << RXRPC_PACKET_TYPE_ABORT) | \ + (1 << RXRPC_PACKET_TYPE_ACKALL) | \ + (1 << RXRPC_PACKET_TYPE_CHALLENGE) | \ + (1 << RXRPC_PACKET_TYPE_RESPONSE) | \ + /*(1 << RXRPC_PACKET_TYPE_DEBUG) | */ \ + (1 << RXRPC_PACKET_TYPE_VERSION)) + +/*****************************************************************************/ +/* + * jumbo packet secondary header + * - can be mapped to read header by: + * - new_serial = serial + 1 + * - new_seq = seq + 1 + * - new_flags = j_flags + * - new__rsvd = j__rsvd + * - duplicating all other fields + */ +struct rxrpc_jumbo_header { + uint8_t flags; /* packet flags (as per rxrpc_header) */ + uint8_t pad; + union { + __be16 _rsvd; /* reserved */ + __be16 cksum; /* kerberos security checksum */ + }; +}; + +#define RXRPC_JUMBO_DATALEN 1412 /* non-terminal jumbo packet data length */ +#define RXRPC_JUMBO_SUBPKTLEN (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header)) + +/*****************************************************************************/ +/* + * on-the-wire Rx ACK packet data payload + * - all multibyte fields should be in network byte order + */ +struct rxrpc_ackpacket { + __be16 bufferSpace; /* number of packet buffers available */ + __be16 maxSkew; /* diff between serno being ACK'd and highest serial no + * received */ + __be32 firstPacket; /* sequence no of first ACK'd packet in attached list */ + __be32 previousPacket; /* sequence no of previous packet received */ + __be32 serial; /* serial no of packet that prompted this ACK */ + + uint8_t reason; /* reason for ACK */ +#define RXRPC_ACK_REQUESTED 1 /* ACK was requested on packet */ +#define RXRPC_ACK_DUPLICATE 2 /* duplicate packet received */ +#define RXRPC_ACK_OUT_OF_SEQUENCE 3 /* out of sequence packet received */ +#define RXRPC_ACK_EXCEEDS_WINDOW 4 /* packet received beyond end of ACK window */ +#define RXRPC_ACK_NOSPACE 5 /* packet discarded due to lack of buffer space */ +#define RXRPC_ACK_PING 6 /* keep alive ACK */ +#define RXRPC_ACK_PING_RESPONSE 7 /* response to RXRPC_ACK_PING */ +#define RXRPC_ACK_DELAY 8 /* nothing happened since received packet */ +#define RXRPC_ACK_IDLE 9 /* ACK due to fully received ACK window */ +#define RXRPC_ACK__INVALID 10 /* Representation of invalid ACK reason */ + + uint8_t nAcks; /* number of ACKs */ +#define RXRPC_MAXACKS 255 + + uint8_t acks[0]; /* list of ACK/NAKs */ +#define RXRPC_ACK_TYPE_NACK 0 +#define RXRPC_ACK_TYPE_ACK 1 + +} __packed; + +/* Some ACKs refer to specific packets and some are general and can be updated. */ +#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED) | \ + (1 << RXRPC_ACK_PING_RESPONSE) | \ + (1 << RXRPC_ACK_DELAY) | \ + (1 << RXRPC_ACK_IDLE)) + + +/* + * ACK packets can have a further piece of information tagged on the end + */ +struct rxrpc_ackinfo { + __be32 rxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */ + __be32 maxMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */ + __be32 rwind; /* Rx window size (packets) [AFS 3.4] */ + __be32 jumbo_max; /* max packets to stick into a jumbo packet [AFS 3.5] */ +}; + +/*****************************************************************************/ +/* + * Kerberos security type-2 challenge packet + */ +struct rxkad_challenge { + __be32 version; /* version of this challenge type */ + __be32 nonce; /* encrypted random number */ + __be32 min_level; /* minimum security level */ + __be32 __padding; /* padding to 8-byte boundary */ +} __packed; + +/*****************************************************************************/ +/* + * Kerberos security type-2 response packet + */ +struct rxkad_response { + __be32 version; /* version of this response type */ + __be32 __pad; + + /* encrypted bit of the response */ + struct { + __be32 epoch; /* current epoch */ + __be32 cid; /* parent connection ID */ + __be32 checksum; /* checksum */ + __be32 securityIndex; /* security type */ + __be32 call_id[4]; /* encrypted call IDs */ + __be32 inc_nonce; /* challenge nonce + 1 */ + __be32 level; /* desired level */ + } encrypted; + + __be32 kvno; /* Kerberos key version number */ + __be32 ticket_len; /* Kerberos ticket length */ +} __packed; + +#endif /* _LINUX_RXRPC_PACKET_H */ diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f2e9ed34a963..02fcb0c78a28 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -110,6 +110,8 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, struct netlink_callback *cb) { int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; + u32 act_flags = cb->args[2]; + unsigned long jiffy_since = cb->args[3]; struct nlattr *nest; spin_lock_bh(&hinfo->lock); @@ -127,6 +129,11 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, if (index < s_i) continue; + if (jiffy_since && + time_after(jiffy_since, + (unsigned long)p->tcfa_tm.lastuse)) + continue; + nest = nla_nest_start(skb, n_i); if (nest == NULL) goto nla_put_failure; @@ -138,14 +145,20 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, } nla_nest_end(skb, nest); n_i++; - if (n_i >= TCA_ACT_MAX_PRIO) + if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) && + n_i >= TCA_ACT_MAX_PRIO) goto done; } } done: + if (index >= 0) + cb->args[0] = index + 1; + spin_unlock_bh(&hinfo->lock); - if (n_i) - cb->args[0] += n_i; + if (n_i) { + if (act_flags & TCA_FLAG_LARGE_DUMP_ON) + cb->args[1] = n_i; + } return n_i; nla_put_failure: @@ -460,9 +473,10 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind) int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res) { - int ret = -1, i; u32 jmp_prgcnt = 0; u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */ + int i; + int ret = TC_ACT_OK; if (skb_skip_tc_classify(skb)) return TC_ACT_OK; @@ -1068,11 +1082,18 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, return tcf_add_notify(net, n, &actions, portid); } +static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON; +static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = { + [TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD32, + .validation_data = &tcaa_root_flags_allowed }, + [TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 }, +}; + static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); - struct nlattr *tca[TCA_ACT_MAX + 1]; + struct nlattr *tca[TCA_ROOT_MAX + 1]; u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; @@ -1080,7 +1101,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL, + ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL, extack); if (ret < 0) return ret; @@ -1121,16 +1142,12 @@ replay: return ret; } -static struct nlattr *find_dump_kind(const struct nlmsghdr *n) +static struct nlattr *find_dump_kind(struct nlattr **nla) { struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; - struct nlattr *nla[TCAA_MAX + 1]; struct nlattr *kind; - if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, - NULL, NULL) < 0) - return NULL; tb1 = nla[TCA_ACT_TAB]; if (tb1 == NULL) return NULL; @@ -1157,8 +1174,20 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) struct tc_action_ops *a_o; int ret = 0; struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh); - struct nlattr *kind = find_dump_kind(cb->nlh); + struct nlattr *tb[TCA_ROOT_MAX + 1]; + struct nlattr *count_attr = NULL; + unsigned long jiffy_since = 0; + struct nlattr *kind = NULL; + struct nla_bitfield32 bf; + u32 msecs_since = 0; + u32 act_count = 0; + + ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX, + tcaa_policy, NULL); + if (ret < 0) + return ret; + kind = find_dump_kind(tb); if (kind == NULL) { pr_info("tc_dump_action: action bad kind\n"); return 0; @@ -1168,14 +1197,32 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (a_o == NULL) return 0; + cb->args[2] = 0; + if (tb[TCA_ROOT_FLAGS]) { + bf = nla_get_bitfield32(tb[TCA_ROOT_FLAGS]); + cb->args[2] = bf.value; + } + + if (tb[TCA_ROOT_TIME_DELTA]) { + msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]); + } + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) goto out_module_put; + + if (msecs_since) + jiffy_since = jiffies - msecs_to_jiffies(msecs_since); + t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; + cb->args[3] = jiffy_since; + count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32)); + if (!count_attr) + goto out_module_put; nest = nla_nest_start(skb, TCA_ACT_TAB); if (nest == NULL) @@ -1188,6 +1235,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (ret > 0) { nla_nest_end(skb, nest); ret = skb->len; + act_count = cb->args[1]; + memcpy(nla_data(count_attr), &act_count, sizeof(u32)); + cb->args[1] = 0; } else nlmsg_trim(skb, b); @@ -1205,10 +1255,10 @@ out_module_put: static int __init tc_action_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action, - NULL); + 0); return 0; } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3317a2f579da..67afc12df88b 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -231,9 +231,6 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, const struct iphdr *iph; u16 ul; - if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - return 1; - /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, @@ -287,9 +284,6 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, const struct ipv6hdr *ip6h; u16 ul; - if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - return 1; - /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 39da0c5801c9..ebeeb87e6d44 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -100,21 +100,6 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) } EXPORT_SYMBOL(unregister_tcf_proto_ops); -static int tfilter_notify(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, struct tcf_proto *tp, - unsigned long fh, int event, bool unicast); - -static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, - struct tcf_chain *chain, int event) -{ - struct tcf_proto *tp; - - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next)) - tfilter_notify(net, oskb, n, tp, 0, event, false); -} - /* Select new prio value from the range, managed by kernel. */ static inline u32 tcf_auto_prio(struct tcf_proto *tp) @@ -407,6 +392,109 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, return tp; } +static int tcf_fill_node(struct net *net, struct sk_buff *skb, + struct tcf_proto *tp, void *fh, u32 portid, + u32 seq, u16 flags, int event) +{ + struct tcmsg *tcm; + struct nlmsghdr *nlh; + unsigned char *b = skb_tail_pointer(skb); + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); + if (!nlh) + goto out_nlmsg_trim; + tcm = nlmsg_data(nlh); + tcm->tcm_family = AF_UNSPEC; + tcm->tcm__pad1 = 0; + tcm->tcm__pad2 = 0; + tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex; + tcm->tcm_parent = tp->classid; + tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); + if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) + goto nla_put_failure; + if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index)) + goto nla_put_failure; + if (!fh) { + tcm->tcm_handle = 0; + } else { + if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0) + goto nla_put_failure; + } + nlh->nlmsg_len = skb_tail_pointer(skb) - b; + return skb->len; + +out_nlmsg_trim: +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static int tfilter_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct tcf_proto *tp, + void *fh, int event, bool unicast) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, + n->nlmsg_flags, event) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + if (unicast) + return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); +} + +static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct tcf_proto *tp, + void *fh, bool unicast, bool *last) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + int err; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, + n->nlmsg_flags, RTM_DELTFILTER) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + err = tp->ops->delete(tp, fh, last); + if (err) { + kfree_skb(skb); + return err; + } + + if (unicast) + return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); +} + +static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, + struct tcf_chain *chain, int event) +{ + struct tcf_proto *tp; + + for (tp = rtnl_dereference(chain->filter_chain); + tp; tp = rtnl_dereference(tp->next)) + tfilter_notify(net, oskb, n, tp, 0, event, false); +} + /* Add/change/delete/get a filter node */ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -428,7 +516,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct tcf_proto *tp; const struct Qdisc_class_ops *cops; unsigned long cl; - unsigned long fh; + void *fh; int err; int tp_created; @@ -567,7 +655,7 @@ replay: fh = tp->ops->get(tp, t->tcm_handle); - if (fh == 0) { + if (!fh) { if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { tcf_chain_tp_remove(chain, &chain_info, tp); tfilter_notify(net, skb, n, tp, fh, @@ -595,11 +683,10 @@ replay: } break; case RTM_DELTFILTER: - err = tp->ops->delete(tp, fh, &last); + err = tfilter_del_notify(net, skb, n, tp, fh, false, + &last); if (err) goto errout; - tfilter_notify(net, skb, n, tp, t->tcm_handle, - RTM_DELTFILTER, false); if (last) { tcf_chain_tp_remove(chain, &chain_info, tp); tcf_proto_destroy(tp); @@ -637,75 +724,13 @@ errout: return err; } -static int tcf_fill_node(struct net *net, struct sk_buff *skb, - struct tcf_proto *tp, unsigned long fh, u32 portid, - u32 seq, u16 flags, int event) -{ - struct tcmsg *tcm; - struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); - if (!nlh) - goto out_nlmsg_trim; - tcm = nlmsg_data(nlh); - tcm->tcm_family = AF_UNSPEC; - tcm->tcm__pad1 = 0; - tcm->tcm__pad2 = 0; - tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex; - tcm->tcm_parent = tp->classid; - tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); - if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) - goto nla_put_failure; - if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index)) - goto nla_put_failure; - tcm->tcm_handle = fh; - if (RTM_DELTFILTER != event) { - tcm->tcm_handle = 0; - if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0) - goto nla_put_failure; - } - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; - -out_nlmsg_trim: -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int tfilter_notify(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, struct tcf_proto *tp, - unsigned long fh, int event, bool unicast) -{ - struct sk_buff *skb; - u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, - n->nlmsg_flags, event) <= 0) { - kfree_skb(skb); - return -EINVAL; - } - - if (unicast) - return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); - - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); -} - struct tcf_dump_args { struct tcf_walker w; struct sk_buff *skb; struct netlink_callback *cb; }; -static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, - struct tcf_walker *arg) +static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) { struct tcf_dump_args *a = (void *)arg; struct net *net = sock_net(a->skb->sk); @@ -883,18 +908,12 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, } EXPORT_SYMBOL(tcf_exts_validate); -void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, - struct tcf_exts *src) +void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src) { #ifdef CONFIG_NET_CLS_ACT struct tcf_exts old = *dst; - tcf_tree_lock(tp); - dst->nr_actions = src->nr_actions; - dst->actions = src->actions; - dst->type = src->type; - tcf_tree_unlock(tp); - + *dst = *src; tcf_exts_destroy(&old); #endif } @@ -915,7 +934,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) #ifdef CONFIG_NET_CLS_ACT struct nlattr *nest; - if (exts->action && exts->nr_actions) { + if (exts->action && tcf_exts_has_actions(exts)) { /* * again for backward compatible mode - we want * to work with both old and new modes of entering @@ -972,7 +991,7 @@ int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, const struct tc_action *a; LIST_HEAD(actions); - if (tc_no_actions(exts)) + if (!tcf_exts_has_actions(exts)) return -EINVAL; tcf_exts_to_list(exts, &actions); @@ -991,10 +1010,10 @@ EXPORT_SYMBOL(tcf_exts_get_dev); static int __init tc_filter_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, - tc_dump_tfilter, NULL); + tc_dump_tfilter, 0); return 0; } diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index c4fd63a068f9..73cc7f167a38 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -56,20 +56,18 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; } -static unsigned long basic_get(struct tcf_proto *tp, u32 handle) +static void *basic_get(struct tcf_proto *tp, u32 handle) { - unsigned long l = 0UL; struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; list_for_each_entry(f, &head->flist, link) { if (f->handle == handle) { - l = (unsigned long) f; - break; + return f; } } - return l; + return NULL; } static int basic_init(struct tcf_proto *tp) @@ -106,10 +104,10 @@ static void basic_destroy(struct tcf_proto *tp) kfree_rcu(head, rcu); } -static int basic_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int basic_delete(struct tcf_proto *tp, void *arg, bool *last) { struct basic_head *head = rtnl_dereference(tp->root); - struct basic_filter *f = (struct basic_filter *) arg; + struct basic_filter *f = arg; list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); @@ -129,38 +127,27 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct nlattr *est, bool ovr) { int err; - struct tcf_exts e; - struct tcf_ematch_tree t; - err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); - if (err < 0) - goto errout; - err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t); + err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &f->ematches); if (err < 0) - goto errout; + return err; if (tb[TCA_BASIC_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_BASIC_CLASSID]); tcf_bind_filter(tp, &f->res, base); } - tcf_exts_change(tp, &f->exts, &e); - tcf_em_tree_change(tp, &f->ematches, &t); f->tp = tp; - return 0; -errout: - tcf_exts_destroy(&e); - return err; } static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr) { int err; struct basic_head *head = rtnl_dereference(tp->root); @@ -213,7 +200,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - *arg = (unsigned long)fnew; + *arg = fnew; if (fold) { list_replace_rcu(&fold->link, &fnew->link); @@ -239,7 +226,7 @@ static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg) if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; break; } @@ -248,10 +235,10 @@ skip: } } -static int basic_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct basic_filter *f = (struct basic_filter *) fh; + struct basic_filter *f = fh; struct nlattr *nest; if (f == NULL) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index f57bd531ba98..db17b68df94e 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -147,24 +147,18 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, enum tc_clsbpf_command cmd) { struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_bpf_offload bpf_offload = {}; - struct tc_to_netdev offload; + struct tc_cls_bpf_offload cls_bpf = {}; int err; - offload.type = TC_SETUP_CLSBPF; - offload.cls_bpf = &bpf_offload; - - bpf_offload.command = cmd; - bpf_offload.exts = &prog->exts; - bpf_offload.prog = prog->filter; - bpf_offload.name = prog->bpf_name; - bpf_offload.exts_integrated = prog->exts_integrated; - bpf_offload.gen_flags = prog->gen_flags; - - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, - tp->protocol, &offload); + tc_cls_common_offload_init(&cls_bpf.common, tp); + cls_bpf.command = cmd; + cls_bpf.exts = &prog->exts; + cls_bpf.prog = prog->filter; + cls_bpf.name = prog->bpf_name; + cls_bpf.exts_integrated = prog->exts_integrated; + cls_bpf.gen_flags = prog->gen_flags; + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSBPF, &cls_bpf); if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE)) prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; @@ -276,11 +270,11 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); } -static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last) { struct cls_bpf_head *head = rtnl_dereference(tp->root); - __cls_bpf_delete(tp, (struct cls_bpf_prog *) arg); + __cls_bpf_delete(tp, arg); *last = list_empty(&head->plist); return 0; } @@ -296,20 +290,17 @@ static void cls_bpf_destroy(struct tcf_proto *tp) kfree_rcu(head, rcu); } -static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) +static void *cls_bpf_get(struct tcf_proto *tp, u32 handle) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog; - unsigned long ret = 0UL; list_for_each_entry(prog, &head->plist, link) { - if (prog->handle == handle) { - ret = (unsigned long) prog; - break; - } + if (prog->handle == handle) + return prog; } - return ret; + return NULL; } static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog) @@ -382,13 +373,11 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, return 0; } -static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, - struct cls_bpf_prog *prog, - unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr) +static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, + struct cls_bpf_prog *prog, unsigned long base, + struct nlattr **tb, struct nlattr *est, bool ovr) { bool is_bpf, is_ebpf, have_exts = false; - struct tcf_exts exts; u32 gen_flags = 0; int ret; @@ -397,30 +386,23 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); + ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr); if (ret < 0) return ret; - ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); - if (ret < 0) - goto errout; if (tb[TCA_BPF_FLAGS]) { u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]); - if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) { - ret = -EINVAL; - goto errout; - } + if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) + return -EINVAL; have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; } if (tb[TCA_BPF_FLAGS_GEN]) { gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]); if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS || - !tc_flags_valid(gen_flags)) { - ret = -EINVAL; - goto errout; - } + !tc_flags_valid(gen_flags)) + return -EINVAL; } prog->exts_integrated = have_exts; @@ -429,19 +411,14 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : cls_bpf_prog_from_efd(tb, prog, tp); if (ret < 0) - goto errout; + return ret; if (tb[TCA_BPF_CLASSID]) { prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]); tcf_bind_filter(tp, &prog->res, base); } - tcf_exts_change(tp, &prog->exts, &exts); return 0; - -errout: - tcf_exts_destroy(&exts); - return ret; } static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, @@ -468,10 +445,10 @@ static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct cls_bpf_head *head = rtnl_dereference(tp->root); - struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg; + struct cls_bpf_prog *oldprog = *arg; struct nlattr *tb[TCA_BPF_MAX + 1]; struct cls_bpf_prog *prog; int ret; @@ -508,8 +485,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; } - ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], - ovr); + ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr); if (ret < 0) goto errout; @@ -530,7 +506,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, list_add_rcu(&prog->link, &head->plist); } - *arg = (unsigned long) prog; + *arg = prog; return 0; errout: @@ -578,10 +554,10 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, return 0; } -static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *tm) { - struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh; + struct cls_bpf_prog *prog = fh; struct nlattr *nest; u32 bpf_flags = 0; int ret; @@ -639,7 +615,7 @@ static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) list_for_each_entry(prog, &head->plist, link) { if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) prog, arg) < 0) { + if (arg->fn(tp, prog, arg) < 0) { arg->stop = 1; break; } diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 12ce547eea04..d48452f87975 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -43,9 +43,9 @@ static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, return tcf_exts_exec(skb, &head->exts, res); } -static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle) +static void *cls_cgroup_get(struct tcf_proto *tp, u32 handle) { - return 0UL; + return NULL; } static int cls_cgroup_init(struct tcf_proto *tp) @@ -71,13 +71,11 @@ static void cls_cgroup_destroy_rcu(struct rcu_head *root) static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; struct cls_cgroup_head *head = rtnl_dereference(tp->root); struct cls_cgroup_head *new; - struct tcf_ematch_tree t; - struct tcf_exts e; int err; if (!tca[TCA_OPTIONS]) @@ -103,23 +101,13 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - err = tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr); if (err < 0) goto errout; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); - if (err < 0) { - tcf_exts_destroy(&e); - goto errout; - } - err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t); - if (err < 0) { - tcf_exts_destroy(&e); + err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &new->ematches); + if (err < 0) goto errout; - } - - tcf_exts_change(tp, &new->exts, &e); - tcf_em_tree_change(tp, &new->ematches, &t); rcu_assign_pointer(tp->root, new); if (head) @@ -140,7 +128,7 @@ static void cls_cgroup_destroy(struct tcf_proto *tp) call_rcu(&head->rcu, cls_cgroup_destroy_rcu); } -static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last) { return -EOPNOTSUPP; } @@ -152,7 +140,7 @@ static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg) if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) head, arg) < 0) { + if (arg->fn(tp, head, arg) < 0) { arg->stop = 1; return; } @@ -160,7 +148,7 @@ skip: arg->count++; } -static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 3065752b9cda..2a3a60ec5b86 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -382,14 +382,12 @@ static void flow_destroy_filter(struct rcu_head *head) static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *fold, *fnew; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FLOW_MAX + 1]; - struct tcf_exts e; - struct tcf_ematch_tree t; unsigned int nkeys = 0; unsigned int perturb_period = 0; u32 baseclass = 0; @@ -425,31 +423,27 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, return -EOPNOTSUPP; } - err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); - if (err < 0) - goto err1; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); - if (err < 0) - goto err1; + fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); + if (!fnew) + return -ENOBUFS; - err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t); + err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches); if (err < 0) goto err1; - err = -ENOBUFS; - fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); - if (!fnew) + err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + if (err < 0) goto err2; - err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr); if (err < 0) - goto err3; + goto err2; - fold = (struct flow_filter *)*arg; + fold = *arg; if (fold) { err = -EINVAL; if (fold->handle != handle && handle) - goto err3; + goto err2; /* Copy fold into fnew */ fnew->tp = fold->tp; @@ -469,31 +463,31 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) - goto err3; + goto err2; if (mode == FLOW_MODE_HASH) perturb_period = fold->perturb_period; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) - goto err3; + goto err2; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } } else { err = -EINVAL; if (!handle) - goto err3; + goto err2; if (!tb[TCA_FLOW_KEYS]) - goto err3; + goto err2; mode = FLOW_MODE_MAP; if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) - goto err3; + goto err2; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) - goto err3; + goto err2; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } @@ -511,9 +505,6 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation, (unsigned long)fnew); - tcf_exts_change(tp, &fnew->exts, &e); - tcf_em_tree_change(tp, &fnew->ematches, &t); - netif_keep_dst(qdisc_dev(tp->q)); if (tb[TCA_FLOW_KEYS]) { @@ -541,31 +532,29 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (perturb_period) mod_timer(&fnew->perturb_timer, jiffies + perturb_period); - if (*arg == 0) + if (!*arg) list_add_tail_rcu(&fnew->list, &head->filters); else list_replace_rcu(&fold->list, &fnew->list); - *arg = (unsigned long)fnew; + *arg = fnew; if (fold) call_rcu(&fold->rcu, flow_destroy_filter); return 0; -err3: - tcf_exts_destroy(&fnew->exts); err2: - tcf_em_tree_destroy(&t); - kfree(fnew); + tcf_exts_destroy(&fnew->exts); + tcf_em_tree_destroy(&fnew->ematches); err1: - tcf_exts_destroy(&e); + kfree(fnew); return err; } -static int flow_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int flow_delete(struct tcf_proto *tp, void *arg, bool *last) { struct flow_head *head = rtnl_dereference(tp->root); - struct flow_filter *f = (struct flow_filter *)arg; + struct flow_filter *f = arg; list_del_rcu(&f->list); call_rcu(&f->rcu, flow_destroy_filter); @@ -597,21 +586,21 @@ static void flow_destroy(struct tcf_proto *tp) kfree_rcu(head, rcu); } -static unsigned long flow_get(struct tcf_proto *tp, u32 handle) +static void *flow_get(struct tcf_proto *tp, u32 handle) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; list_for_each_entry(f, &head->filters, list) if (f->handle == handle) - return (unsigned long)f; - return 0; + return f; + return NULL; } -static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct flow_filter *f = (struct flow_filter *)fh; + struct flow_filter *f = fh; struct nlattr *nest; if (f == NULL) @@ -677,7 +666,7 @@ static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg) list_for_each_entry(f, &head->filters, list) { if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long)f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; break; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 7832eb93379b..d2551a03c542 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -88,7 +88,6 @@ struct cls_fl_filter { u32 handle; u32 flags; struct rcu_head rcu; - struct tc_to_netdev tc; struct net_device *hw_dev; }; @@ -225,22 +224,17 @@ static void fl_destroy_filter(struct rcu_head *head) static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) { - struct tc_cls_flower_offload offload = {0}; + struct tc_cls_flower_offload cls_flower = {}; struct net_device *dev = f->hw_dev; - struct tc_to_netdev *tc = &f->tc; if (!tc_can_offload(dev, tp)) return; - offload.command = TC_CLSFLOWER_DESTROY; - offload.prio = tp->prio; - offload.cookie = (unsigned long)f; + tc_cls_common_offload_init(&cls_flower.common, tp); + cls_flower.command = TC_CLSFLOWER_DESTROY; + cls_flower.cookie = (unsigned long) f; - tc->type = TC_SETUP_CLSFLOWER; - tc->cls_flower = &offload; - - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index, - tp->protocol, tc); + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower); } static int fl_hw_replace_filter(struct tcf_proto *tp, @@ -249,8 +243,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, struct cls_fl_filter *f) { struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_flower_offload offload = {0}; - struct tc_to_netdev *tc = &f->tc; + struct tc_cls_flower_offload cls_flower = {}; int err; if (!tc_can_offload(dev, tp)) { @@ -260,24 +253,21 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, return tc_skip_sw(f->flags) ? -EINVAL : 0; } dev = f->hw_dev; - tc->egress_dev = true; + cls_flower.egress_dev = true; } else { f->hw_dev = dev; } - offload.command = TC_CLSFLOWER_REPLACE; - offload.prio = tp->prio; - offload.cookie = (unsigned long)f; - offload.dissector = dissector; - offload.mask = mask; - offload.key = &f->mkey; - offload.exts = &f->exts; - - tc->type = TC_SETUP_CLSFLOWER; - tc->cls_flower = &offload; + tc_cls_common_offload_init(&cls_flower.common, tp); + cls_flower.command = TC_CLSFLOWER_REPLACE; + cls_flower.cookie = (unsigned long) f; + cls_flower.dissector = dissector; + cls_flower.mask = mask; + cls_flower.key = &f->mkey; + cls_flower.exts = &f->exts; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, tc); + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, + &cls_flower); if (!err) f->flags |= TCA_CLS_FLAGS_IN_HW; @@ -288,23 +278,19 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) { - struct tc_cls_flower_offload offload = {0}; + struct tc_cls_flower_offload cls_flower = {}; struct net_device *dev = f->hw_dev; - struct tc_to_netdev *tc = &f->tc; if (!tc_can_offload(dev, tp)) return; - offload.command = TC_CLSFLOWER_STATS; - offload.prio = tp->prio; - offload.cookie = (unsigned long)f; - offload.exts = &f->exts; - - tc->type = TC_SETUP_CLSFLOWER; - tc->cls_flower = &offload; + tc_cls_common_offload_init(&cls_flower.common, tp); + cls_flower.command = TC_CLSFLOWER_STATS; + cls_flower.cookie = (unsigned long) f; + cls_flower.exts = &f->exts; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, tc); + dev->netdev_ops->ndo_setup_tc(dev, TC_CLSFLOWER_STATS, + &cls_flower); } static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) @@ -346,15 +332,15 @@ static void fl_destroy(struct tcf_proto *tp) call_rcu(&head->rcu, fl_destroy_rcu); } -static unsigned long fl_get(struct tcf_proto *tp, u32 handle) +static void *fl_get(struct tcf_proto *tp, u32 handle) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f; list_for_each_entry(f, &head->filters, list) if (f->handle == handle) - return (unsigned long) f; - return 0; + return f; + return NULL; } static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { @@ -852,15 +838,11 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr) { - struct tcf_exts e; int err; - err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); - if (err < 0) - goto errout; if (tb[TCA_FLOWER_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); @@ -869,17 +851,12 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, err = fl_set_key(net, tb, &f->key, &mask->key); if (err) - goto errout; + return err; fl_mask_update_range(mask); fl_set_masked_key(&f->mkey, &f->key, mask); - tcf_exts_change(tp, &f->exts, &e); - return 0; -errout: - tcf_exts_destroy(&e); - return err; } static u32 fl_grab_new_handle(struct tcf_proto *tp, @@ -906,10 +883,10 @@ static u32 fl_grab_new_handle(struct tcf_proto *tp, static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct cls_fl_head *head = rtnl_dereference(tp->root); - struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg; + struct cls_fl_filter *fold = *arg; struct cls_fl_filter *fnew; struct nlattr **tb; struct fl_flow_mask mask = {}; @@ -1000,7 +977,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, fl_hw_destroy_filter(tp, fold); } - *arg = (unsigned long) fnew; + *arg = fnew; if (fold) { list_replace_rcu(&fold->list, &fnew->list); @@ -1021,10 +998,10 @@ errout_tb: return err; } -static int fl_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int fl_delete(struct tcf_proto *tp, void *arg, bool *last) { struct cls_fl_head *head = rtnl_dereference(tp->root); - struct cls_fl_filter *f = (struct cls_fl_filter *) arg; + struct cls_fl_filter *f = arg; if (!tc_skip_sw(f->flags)) rhashtable_remove_fast(&head->ht, &f->ht_node, @@ -1042,7 +1019,7 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg) list_for_each_entry_rcu(f, &head->filters, list) { if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; break; } @@ -1177,11 +1154,11 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask); } -static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { struct cls_fl_head *head = rtnl_dereference(tp->root); - struct cls_fl_filter *f = (struct cls_fl_filter *) fh; + struct cls_fl_filter *f = fh; struct nlattr *nest; struct fl_flow_key *key, *mask; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index d3885362e017..192255ec50bd 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -95,20 +95,20 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; } -static unsigned long fw_get(struct tcf_proto *tp, u32 handle) +static void *fw_get(struct tcf_proto *tp, u32 handle) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; if (head == NULL) - return 0; + return NULL; f = rtnl_dereference(head->ht[fw_hash(handle)]); for (; f; f = rtnl_dereference(f->next)) { if (f->id == handle) - return (unsigned long)f; + return f; } - return 0; + return NULL; } static int fw_init(struct tcf_proto *tp) @@ -147,10 +147,10 @@ static void fw_destroy(struct tcf_proto *tp) kfree_rcu(head, rcu); } -static int fw_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int fw_delete(struct tcf_proto *tp, void *arg, bool *last) { struct fw_head *head = rtnl_dereference(tp->root); - struct fw_filter *f = (struct fw_filter *)arg; + struct fw_filter *f = arg; struct fw_filter __rcu **fp; struct fw_filter *pfp; int ret = -EINVAL; @@ -190,22 +190,17 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { [TCA_FW_MASK] = { .type = NLA_U32 }, }; -static int -fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, - struct nlattr **tb, struct nlattr **tca, unsigned long base, - bool ovr) +static int fw_set_parms(struct net *net, struct tcf_proto *tp, + struct fw_filter *f, struct nlattr **tb, + struct nlattr **tca, unsigned long base, bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); - struct tcf_exts e; u32 mask; int err; - err = tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); - if (err < 0) - goto errout; if (tb[TCA_FW_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); @@ -216,10 +211,8 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, if (tb[TCA_FW_INDEV]) { int ret; ret = tcf_change_indev(net, tb[TCA_FW_INDEV]); - if (ret < 0) { - err = ret; - goto errout; - } + if (ret < 0) + return ret; f->ifindex = ret; } #endif /* CONFIG_NET_CLS_IND */ @@ -228,25 +221,20 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, if (tb[TCA_FW_MASK]) { mask = nla_get_u32(tb[TCA_FW_MASK]); if (mask != head->mask) - goto errout; + return err; } else if (head->mask != 0xFFFFFFFF) - goto errout; - - tcf_exts_change(tp, &f->exts, &e); + return err; return 0; -errout: - tcf_exts_destroy(&e); - return err; } static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, - u32 handle, struct nlattr **tca, unsigned long *arg, + u32 handle, struct nlattr **tca, void **arg, bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); - struct fw_filter *f = (struct fw_filter *) *arg; + struct fw_filter *f = *arg; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FW_MAX + 1]; int err; @@ -282,7 +270,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return err; } - err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr); + err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr); if (err < 0) { tcf_exts_destroy(&fnew->exts); kfree(fnew); @@ -300,7 +288,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, fw_delete_filter); - *arg = (unsigned long)fnew; + *arg = fnew; return err; } @@ -330,14 +318,14 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, f->id = handle; f->tp = tp; - err = fw_change_attrs(net, tp, f, tb, tca, base, ovr); + err = fw_set_parms(net, tp, f, tb, tca, base, ovr); if (err < 0) goto errout; RCU_INIT_POINTER(f->next, head->ht[fw_hash(handle)]); rcu_assign_pointer(head->ht[fw_hash(handle)], f); - *arg = (unsigned long)f; + *arg = f; return 0; errout: @@ -366,7 +354,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) arg->count++; continue; } - if (arg->fn(tp, (unsigned long)f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; return; } @@ -375,11 +363,11 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { struct fw_head *head = rtnl_dereference(tp->root); - struct fw_filter *f = (struct fw_filter *)fh; + struct fw_filter *f = fh; struct nlattr *nest; if (f == NULL) @@ -387,7 +375,7 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, t->tcm_handle = f->id; - if (!f->res.classid && !tcf_exts_is_available(&f->exts)) + if (!f->res.classid && !tcf_exts_has_actions(&f->exts)) return skb->len; nest = nla_nest_start(skb, TCA_OPTIONS); diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 9dc26c32cf32..d44e26fdae84 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -54,19 +54,16 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, unsigned long cookie) { struct net_device *dev = tp->q->dev_queue->dev; - struct tc_to_netdev offload; - struct tc_cls_matchall_offload mall_offload = {0}; + struct tc_cls_matchall_offload cls_mall = {}; int err; - offload.type = TC_SETUP_MATCHALL; - offload.cls_mall = &mall_offload; - offload.cls_mall->command = TC_CLSMATCHALL_REPLACE; - offload.cls_mall->exts = &head->exts; - offload.cls_mall->cookie = cookie; + tc_cls_common_offload_init(&cls_mall.common, tp); + cls_mall.command = TC_CLSMATCHALL_REPLACE; + cls_mall.exts = &head->exts; + cls_mall.cookie = cookie; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, - tp->protocol, &offload); + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, + &cls_mall); if (!err) head->flags |= TCA_CLS_FLAGS_IN_HW; @@ -78,17 +75,13 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp, unsigned long cookie) { struct net_device *dev = tp->q->dev_queue->dev; - struct tc_to_netdev offload; - struct tc_cls_matchall_offload mall_offload = {0}; + struct tc_cls_matchall_offload cls_mall = {}; - offload.type = TC_SETUP_MATCHALL; - offload.cls_mall = &mall_offload; - offload.cls_mall->command = TC_CLSMATCHALL_DESTROY; - offload.cls_mall->exts = NULL; - offload.cls_mall->cookie = cookie; + tc_cls_common_offload_init(&cls_mall.common, tp); + cls_mall.command = TC_CLSMATCHALL_DESTROY; + cls_mall.cookie = cookie; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index, - tp->protocol, &offload); + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, &cls_mall); } static void mall_destroy(struct tcf_proto *tp) @@ -105,9 +98,9 @@ static void mall_destroy(struct tcf_proto *tp) call_rcu(&head->rcu, mall_destroy_rcu); } -static unsigned long mall_get(struct tcf_proto *tp, u32 handle) +static void *mall_get(struct tcf_proto *tp, u32 handle) { - return 0UL; + return NULL; } static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { @@ -120,33 +113,23 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr) { - struct tcf_exts e; int err; - err = tcf_exts_init(&e, TCA_MATCHALL_ACT, 0); - if (err) - return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr); if (err < 0) - goto errout; + return err; if (tb[TCA_MATCHALL_CLASSID]) { head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]); tcf_bind_filter(tp, &head->res, base); } - - tcf_exts_change(tp, &head->exts, &e); - return 0; -errout: - tcf_exts_destroy(&e); - return err; } static int mall_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct cls_mall_head *head = rtnl_dereference(tp->root); struct net_device *dev = tp->q->dev_queue->dev; @@ -202,7 +185,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(new->flags)) new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; - *arg = (unsigned long) head; + *arg = head; rcu_assign_pointer(tp->root, new); return 0; @@ -214,7 +197,7 @@ err_exts_init: return err; } -static int mall_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int mall_delete(struct tcf_proto *tp, void *arg, bool *last) { return -EOPNOTSUPP; } @@ -225,16 +208,16 @@ static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg) if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) head, arg) < 0) + if (arg->fn(tp, head, arg) < 0) arg->stop = 1; skip: arg->count++; } -static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct cls_mall_head *head = (struct cls_mall_head *) fh; + struct cls_mall_head *head = fh; struct nlattr *nest; if (!head) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index d63d5502ee02..3b70982394ce 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -113,7 +113,7 @@ static inline int route4_hash_wild(void) #define ROUTE4_APPLY_RESULT() \ { \ *res = f->res; \ - if (tcf_exts_is_available(&f->exts)) { \ + if (tcf_exts_has_actions(&f->exts)) { \ int r = tcf_exts_exec(skb, &f->exts, res); \ if (r < 0) { \ dont_cache = 1; \ @@ -216,7 +216,7 @@ static inline u32 from_hash(u32 id) return 16 + (id & 0xF); } -static unsigned long route4_get(struct tcf_proto *tp, u32 handle) +static void *route4_get(struct tcf_proto *tp, u32 handle) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_bucket *b; @@ -225,11 +225,11 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) h1 = to_hash(handle); if (h1 > 256) - return 0; + return NULL; h2 = from_hash(handle >> 16); if (h2 > 32) - return 0; + return NULL; b = rtnl_dereference(head->table[h1]); if (b) { @@ -237,9 +237,9 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) f; f = rtnl_dereference(f->next)) if (f->handle == handle) - return (unsigned long)f; + return f; } - return 0; + return NULL; } static int route4_init(struct tcf_proto *tp) @@ -294,10 +294,10 @@ static void route4_destroy(struct tcf_proto *tp) kfree_rcu(head, rcu); } -static int route4_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int route4_delete(struct tcf_proto *tp, void *arg, bool *last) { struct route4_head *head = rtnl_dereference(tp->root); - struct route4_filter *f = (struct route4_filter *)arg; + struct route4_filter *f = arg; struct route4_filter __rcu **fp; struct route4_filter *nf; struct route4_bucket *b; @@ -372,37 +372,32 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct route4_filter *fp; unsigned int h1; struct route4_bucket *b; - struct tcf_exts e; int err; - err = tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); - if (err < 0) - goto errout; - err = -EINVAL; if (tb[TCA_ROUTE4_TO]) { if (new && handle & 0x8000) - goto errout; + return -EINVAL; to = nla_get_u32(tb[TCA_ROUTE4_TO]); if (to > 0xFF) - goto errout; + return -EINVAL; nhandle = to; } if (tb[TCA_ROUTE4_FROM]) { if (tb[TCA_ROUTE4_IIF]) - goto errout; + return -EINVAL; id = nla_get_u32(tb[TCA_ROUTE4_FROM]); if (id > 0xFF) - goto errout; + return -EINVAL; nhandle |= id << 16; } else if (tb[TCA_ROUTE4_IIF]) { id = nla_get_u32(tb[TCA_ROUTE4_IIF]); if (id > 0x7FFF) - goto errout; + return -EINVAL; nhandle |= (id | 0x8000) << 16; } else nhandle |= 0xFFFF << 16; @@ -410,27 +405,25 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, if (handle && new) { nhandle |= handle & 0x7F00; if (nhandle != handle) - goto errout; + return -EINVAL; } h1 = to_hash(nhandle); b = rtnl_dereference(head->table[h1]); if (!b) { - err = -ENOBUFS; b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL); if (b == NULL) - goto errout; + return -ENOBUFS; rcu_assign_pointer(head->table[h1], b); } else { unsigned int h2 = from_hash(nhandle >> 16); - err = -EEXIST; for (fp = rtnl_dereference(b->ht[h2]); fp; fp = rtnl_dereference(fp->next)) if (fp->handle == f->handle) - goto errout; + return -EEXIST; } if (tb[TCA_ROUTE4_TO]) @@ -450,17 +443,12 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, tcf_bind_filter(tp, &f->res, base); } - tcf_exts_change(tp, &f->exts, &e); - return 0; -errout: - tcf_exts_destroy(&e); - return err; } static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; @@ -479,7 +467,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; - fold = (struct route4_filter *)*arg; + fold = *arg; if (fold && handle && fold->handle != handle) return -EINVAL; @@ -537,7 +525,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, } route4_reset_fastmap(head); - *arg = (unsigned long)f; + *arg = f; if (fold) { tcf_unbind_filter(tp, &fold->res); call_rcu(&fold->rcu, route4_delete_filter); @@ -576,7 +564,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) arg->count++; continue; } - if (arg->fn(tp, (unsigned long)f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; return; } @@ -587,10 +575,10 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int route4_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct route4_filter *f = (struct route4_filter *)fh; + struct route4_filter *f = fh; struct nlattr *nest; u32 id; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 0d9d07798699..26203ff817f3 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -248,7 +248,7 @@ static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h) BUG_ON(1); } -static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) +static void *rsvp_get(struct tcf_proto *tp, u32 handle) { struct rsvp_head *head = rtnl_dereference(tp->root); struct rsvp_session *s; @@ -257,17 +257,17 @@ static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) unsigned int h2 = (handle >> 8) & 0xFF; if (h2 > 16) - return 0; + return NULL; for (s = rtnl_dereference(head->ht[h1]); s; s = rtnl_dereference(s->next)) { for (f = rtnl_dereference(s->ht[h2]); f; f = rtnl_dereference(f->next)) { if (f->handle == handle) - return (unsigned long)f; + return f; } } - return 0; + return NULL; } static int rsvp_init(struct tcf_proto *tp) @@ -328,10 +328,10 @@ static void rsvp_destroy(struct tcf_proto *tp) kfree_rcu(data, rcu); } -static int rsvp_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last) { struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg; + struct rsvp_filter *nfp, *f = arg; struct rsvp_filter __rcu **fp; unsigned int h = f->handle; struct rsvp_session __rcu **sp; @@ -464,7 +464,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct rsvp_head *data = rtnl_dereference(tp->root); struct rsvp_filter *f, *nfp; @@ -493,7 +493,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout2; - f = (struct rsvp_filter *)*arg; + f = *arg; if (f) { /* Node exists: adjust only classid */ struct rsvp_filter *n; @@ -518,7 +518,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, tcf_bind_filter(tp, &n->res, base); } - tcf_exts_change(tp, &n->exts, &e); + tcf_exts_change(&n->exts, &e); rsvp_replace(tp, n, handle); return 0; } @@ -591,7 +591,7 @@ insert: if (f->tunnelhdr == 0) tcf_bind_filter(tp, &f->res, base); - tcf_exts_change(tp, &f->exts, &e); + tcf_exts_change(&f->exts, &e); fp = &s->ht[h2]; for (nfp = rtnl_dereference(*fp); nfp; @@ -604,7 +604,7 @@ insert: RCU_INIT_POINTER(f->next, nfp); rcu_assign_pointer(*fp, f); - *arg = (unsigned long)f; + *arg = f; return 0; } } @@ -663,7 +663,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) arg->count++; continue; } - if (arg->fn(tp, (unsigned long)f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; return; } @@ -674,10 +674,10 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct rsvp_filter *f = (struct rsvp_filter *)fh; + struct rsvp_filter *f = fh; struct rsvp_session *s; struct nlattr *nest; struct tc_rsvp_pinfo pinfo; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 8a8a58357c39..fb281b9b2c52 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -52,7 +52,7 @@ struct tcindex_data { static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) { - return tcf_exts_is_predicative(&r->exts) || r->res.classid; + return tcf_exts_has_actions(&r->exts) || r->res.classid; } static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p, @@ -104,16 +104,16 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, } -static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle) +static void *tcindex_get(struct tcf_proto *tp, u32 handle) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r; pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle); if (p->perfect && handle >= p->alloc_hash) - return 0; + return NULL; r = tcindex_lookup(p, handle); - return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL; + return r && tcindex_filter_is_set(r) ? r : NULL; } static int tcindex_init(struct tcf_proto *tp) @@ -150,14 +150,14 @@ static void tcindex_destroy_fexts(struct rcu_head *head) kfree(f); } -static int tcindex_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last) { struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg; + struct tcindex_filter_result *r = arg; struct tcindex_filter __rcu **walk; struct tcindex_filter *f = NULL; - pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p); + pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p); if (p->perfect) { if (!r->res.class) return -ENOENT; @@ -192,8 +192,7 @@ found: } static int tcindex_destroy_element(struct tcf_proto *tp, - unsigned long arg, - struct tcf_walker *walker) + void *arg, struct tcf_walker *walker) { bool last; @@ -419,9 +418,9 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } if (old_r) - tcf_exts_change(tp, &r->exts, &e); + tcf_exts_change(&r->exts, &e); else - tcf_exts_change(tp, &cr.exts, &e); + tcf_exts_change(&cr.exts, &e); if (old_r && old_r != r) { err = tcindex_filter_result_init(old_r); @@ -439,7 +438,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcindex_filter *nfp; struct tcindex_filter __rcu **fp; - tcf_exts_change(tp, &f->result.exts, &r->exts); + tcf_exts_change(&f->result.exts, &r->exts); fp = cp->h + (handle % cp->hash); for (nfp = rtnl_dereference(*fp); @@ -471,17 +470,17 @@ errout: static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg; + struct tcindex_filter_result *r = *arg; int err; pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p," - "p %p,r %p,*arg 0x%lx\n", - tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L); + "p %p,r %p,*arg %p\n", + tp, handle, tca, arg, opt, p, r, arg ? *arg : NULL); if (!opt) return 0; @@ -506,9 +505,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) if (!p->perfect[i].res.class) continue; if (walker->count >= walker->skip) { - if (walker->fn(tp, - (unsigned long) (p->perfect+i), walker) - < 0) { + if (walker->fn(tp, p->perfect + i, walker) < 0) { walker->stop = 1; return; } @@ -522,8 +519,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) for (f = rtnl_dereference(p->h[i]); f; f = next) { next = rtnl_dereference(f->next); if (walker->count >= walker->skip) { - if (walker->fn(tp, (unsigned long) &f->result, - walker) < 0) { + if (walker->fn(tp, &f->result, walker) < 0) { walker->stop = 1; return; } @@ -548,14 +544,14 @@ static void tcindex_destroy(struct tcf_proto *tp) } -static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; + struct tcindex_filter_result *r = fh; struct nlattr *nest; - pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p\n", + pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n", tp, fh, skb, t, p, r); pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 2d01195153e6..5a3f78181526 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -289,7 +289,7 @@ out: } -static unsigned long u32_get(struct tcf_proto *tp, u32 handle) +static void *u32_get(struct tcf_proto *tp, u32 handle) { struct tc_u_hnode *ht; struct tc_u_common *tp_c = tp->data; @@ -300,12 +300,12 @@ static unsigned long u32_get(struct tcf_proto *tp, u32 handle) ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle)); if (!ht) - return 0; + return NULL; if (TC_U32_KEY(handle) == 0) - return (unsigned long)ht; + return ht; - return (unsigned long)u32_lookup_key(ht, handle); + return u32_lookup_key(ht, handle); } static u32 gen_new_htid(struct tc_u_common *tp_c) @@ -431,43 +431,35 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) { struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_u32_offload u32_offload = {0}; - struct tc_to_netdev offload; - - offload.type = TC_SETUP_CLSU32; - offload.cls_u32 = &u32_offload; - - if (tc_should_offload(dev, tp, 0)) { - offload.cls_u32->command = TC_CLSU32_DELETE_KNODE; - offload.cls_u32->knode.handle = handle; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, - &offload); - } + struct tc_cls_u32_offload cls_u32 = {}; + + if (!tc_should_offload(dev, tp, 0)) + return; + + tc_cls_common_offload_init(&cls_u32.common, tp); + cls_u32.command = TC_CLSU32_DELETE_KNODE; + cls_u32.knode.handle = handle; + + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); } static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_u32_offload u32_offload = {0}; - struct tc_to_netdev offload; + struct tc_cls_u32_offload cls_u32 = {}; int err; if (!tc_should_offload(dev, tp, flags)) return tc_skip_sw(flags) ? -EINVAL : 0; - offload.type = TC_SETUP_CLSU32; - offload.cls_u32 = &u32_offload; - - offload.cls_u32->command = TC_CLSU32_NEW_HNODE; - offload.cls_u32->hnode.divisor = h->divisor; - offload.cls_u32->hnode.handle = h->handle; - offload.cls_u32->hnode.prio = h->prio; + tc_cls_common_offload_init(&cls_u32.common, tp); + cls_u32.command = TC_CLSU32_NEW_HNODE; + cls_u32.hnode.divisor = h->divisor; + cls_u32.hnode.handle = h->handle; + cls_u32.hnode.prio = h->prio; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, - &offload); + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); if (tc_skip_sw(flags)) return err; @@ -477,56 +469,47 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) { struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_u32_offload u32_offload = {0}; - struct tc_to_netdev offload; + struct tc_cls_u32_offload cls_u32 = {}; - offload.type = TC_SETUP_CLSU32; - offload.cls_u32 = &u32_offload; + if (!tc_should_offload(dev, tp, 0)) + return; - if (tc_should_offload(dev, tp, 0)) { - offload.cls_u32->command = TC_CLSU32_DELETE_HNODE; - offload.cls_u32->hnode.divisor = h->divisor; - offload.cls_u32->hnode.handle = h->handle; - offload.cls_u32->hnode.prio = h->prio; + tc_cls_common_offload_init(&cls_u32.common, tp); + cls_u32.command = TC_CLSU32_DELETE_HNODE; + cls_u32.hnode.divisor = h->divisor; + cls_u32.hnode.handle = h->handle; + cls_u32.hnode.prio = h->prio; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, - &offload); - } + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); } static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_u32_offload u32_offload = {0}; - struct tc_to_netdev offload; + struct tc_cls_u32_offload cls_u32 = {}; int err; - offload.type = TC_SETUP_CLSU32; - offload.cls_u32 = &u32_offload; - if (!tc_should_offload(dev, tp, flags)) return tc_skip_sw(flags) ? -EINVAL : 0; - offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; - offload.cls_u32->knode.handle = n->handle; - offload.cls_u32->knode.fshift = n->fshift; + tc_cls_common_offload_init(&cls_u32.common, tp); + cls_u32.command = TC_CLSU32_REPLACE_KNODE; + cls_u32.knode.handle = n->handle; + cls_u32.knode.fshift = n->fshift; #ifdef CONFIG_CLS_U32_MARK - offload.cls_u32->knode.val = n->val; - offload.cls_u32->knode.mask = n->mask; + cls_u32.knode.val = n->val; + cls_u32.knode.mask = n->mask; #else - offload.cls_u32->knode.val = 0; - offload.cls_u32->knode.mask = 0; + cls_u32.knode.val = 0; + cls_u32.knode.mask = 0; #endif - offload.cls_u32->knode.sel = &n->sel; - offload.cls_u32->knode.exts = &n->exts; + cls_u32.knode.sel = &n->sel; + cls_u32.knode.exts = &n->exts; if (n->ht_down) - offload.cls_u32->knode.link_handle = n->ht_down->handle; + cls_u32.knode.link_handle = n->ht_down->handle; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, - &offload); + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); if (!err) n->flags |= TCA_CLS_FLAGS_IN_HW; @@ -622,9 +605,9 @@ static void u32_destroy(struct tcf_proto *tp) tp->data = NULL; } -static int u32_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int u32_delete(struct tcf_proto *tp, void *arg, bool *last) { - struct tc_u_hnode *ht = (struct tc_u_hnode *)arg; + struct tc_u_hnode *ht = arg; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); struct tc_u_common *tp_c = tp->data; int ret = 0; @@ -723,29 +706,24 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, struct tc_u_knode *n, struct nlattr **tb, struct nlattr *est, bool ovr) { - struct tcf_exts e; int err; - err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); - if (err < 0) - goto errout; - err = -EINVAL; if (tb[TCA_U32_LINK]) { u32 handle = nla_get_u32(tb[TCA_U32_LINK]); struct tc_u_hnode *ht_down = NULL, *ht_old; if (TC_U32_KEY(handle)) - goto errout; + return -EINVAL; if (handle) { ht_down = u32_lookup_ht(ht->tp_c, handle); if (ht_down == NULL) - goto errout; + return -EINVAL; ht_down->refcnt++; } @@ -765,16 +743,11 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, int ret; ret = tcf_change_indev(net, tb[TCA_U32_INDEV]); if (ret < 0) - goto errout; + return -EINVAL; n->ifindex = ret; } #endif - tcf_exts_change(tp, &n->exts, &e); - return 0; -errout: - tcf_exts_destroy(&e); - return err; } static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, @@ -858,7 +831,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -885,7 +858,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - n = (struct tc_u_knode *)*arg; + n = *arg; if (n) { struct tc_u_knode *new; @@ -952,7 +925,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, ht); - *arg = (unsigned long)ht; + *arg = ht; return 0; } @@ -1047,7 +1020,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(n->next, pins); rcu_assign_pointer(*ins, n); - *arg = (unsigned long)n; + *arg = n; return 0; } @@ -1081,7 +1054,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) if (ht->prio != tp->prio) continue; if (arg->count >= arg->skip) { - if (arg->fn(tp, (unsigned long)ht, arg) < 0) { + if (arg->fn(tp, ht, arg) < 0) { arg->stop = 1; return; } @@ -1095,7 +1068,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) arg->count++; continue; } - if (arg->fn(tp, (unsigned long)n, arg) < 0) { + if (arg->fn(tp, n, arg) < 0) { arg->stop = 1; return; } @@ -1105,10 +1078,10 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct tc_u_knode *n = (struct tc_u_knode *)fh; + struct tc_u_knode *n = fh; struct tc_u_hnode *ht_up, *ht_down; struct nlattr *nest; @@ -1122,7 +1095,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, goto nla_put_failure; if (TC_U32_KEY(n->handle) == 0) { - struct tc_u_hnode *ht = (struct tc_u_hnode *)fh; + struct tc_u_hnode *ht = fh; u32 divisor = ht->divisor + 1; if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor)) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bd24a550e0f9..816c8092e601 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1952,14 +1952,14 @@ static int __init pktsched_init(void) register_qdisc(&mq_qdisc_ops); register_qdisc(&noqueue_qdisc_ops); - rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, - NULL); - rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL); + 0); + rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, - NULL); + 0); return 0; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 572fe2584e48..0af4b1c6f674 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -41,6 +41,7 @@ #define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) struct atm_flow_data { + struct Qdisc_class_common common; struct Qdisc *q; /* FIFO, TBF, etc. */ struct tcf_proto __rcu *filter_list; struct tcf_block *block; @@ -49,7 +50,6 @@ struct atm_flow_data { struct sk_buff *skb); /* chaining */ struct atm_qdisc_data *parent; /* parent qdisc */ struct socket *sock; /* for closing */ - u32 classid; /* x:y type ID */ int ref; /* reference count */ struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; @@ -75,7 +75,7 @@ static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) struct atm_flow_data *flow; list_for_each_entry(flow, &p->flows, list) { - if (flow->classid == classid) + if (flow->common.classid == classid) return flow; } return NULL; @@ -293,7 +293,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, flow->old_pop = flow->vcc->pop; flow->parent = p; flow->vcc->pop = sch_atm_pop; - flow->classid = classid; + flow->common.classid = classid; flow->ref = 1; flow->excess = excess; list_add(&flow->list, &p->link.list); @@ -549,7 +549,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) p->link.vcc = NULL; p->link.sock = NULL; - p->link.classid = sch->handle; + p->link.common.classid = sch->handle; p->link.ref = 1; tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); return 0; @@ -594,7 +594,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, sch, p, flow, skb, tcm); if (list_empty(&flow->list)) return -EINVAL; - tcm->tcm_handle = flow->classid; + tcm->tcm_handle = flow->common.classid; tcm->tcm_info = flow->q->handle; nest = nla_nest_start(skb, TCA_OPTIONS); @@ -619,7 +619,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, goto nla_put_failure; } if (flow->excess) { - if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->classid)) + if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid)) goto nla_put_failure; } else { if (nla_put_u32(skb, TCA_ATM_EXCESS, 0)) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index e0c02725cd48..2165a05994b7 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -39,11 +39,9 @@ static void mqprio_destroy(struct Qdisc *sch) } if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) { - struct tc_mqprio_qopt offload = { 0 }; - struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO, - { .mqprio = &offload } }; + struct tc_mqprio_qopt mqprio = {}; - dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, 0, &tc); + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, &mqprio); } else { netdev_set_num_tc(dev, 0); } @@ -148,16 +146,14 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) * supplied and verified mapping */ if (qopt->hw) { - struct tc_mqprio_qopt offload = *qopt; - struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO, - { .mqprio = &offload } }; + struct tc_mqprio_qopt mqprio = *qopt; - err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, - 0, 0, &tc); + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, + &mqprio); if (err) return err; - priv->hw_offload = offload.hw; + priv->hw_offload = mqprio.hw; } else { netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 40ec83679d6e..dfb9651e818b 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -63,11 +63,11 @@ static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc); /* 1st Level Abstractions. */ /* Initialize a new association from provided memory. */ -static struct sctp_association *sctp_association_init(struct sctp_association *asoc, - const struct sctp_endpoint *ep, - const struct sock *sk, - sctp_scope_t scope, - gfp_t gfp) +static struct sctp_association *sctp_association_init( + struct sctp_association *asoc, + const struct sctp_endpoint *ep, + const struct sock *sk, + enum sctp_scope scope, gfp_t gfp) { struct net *net = sock_net(sk); struct sctp_sock *sp; @@ -301,9 +301,8 @@ fail_init: /* Allocate and initialize a new association */ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, - const struct sock *sk, - sctp_scope_t scope, - gfp_t gfp) + const struct sock *sk, + enum sctp_scope scope, gfp_t gfp) { struct sctp_association *asoc; @@ -797,7 +796,7 @@ void sctp_assoc_del_nonprimary_peers(struct sctp_association *asoc, */ void sctp_assoc_control_transport(struct sctp_association *asoc, struct sctp_transport *transport, - sctp_transport_cmd_t command, + enum sctp_transport_cmd command, sctp_sn_error_t error) { struct sctp_ulpevent *event; @@ -1022,11 +1021,11 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) container_of(work, struct sctp_association, base.inqueue.immediate); struct net *net = sock_net(asoc->base.sk); + union sctp_subtype subtype; struct sctp_endpoint *ep; struct sctp_chunk *chunk; struct sctp_inq *inqueue; int state; - sctp_subtype_t subtype; int error = 0; /* The association should be held so we should be safe. */ @@ -1564,7 +1563,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) * local endpoint and the remote peer. */ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, - sctp_scope_t scope, gfp_t gfp) + enum sctp_scope scope, gfp_t gfp) { int flags; diff --git a/net/sctp/auth.c b/net/sctp/auth.c index e001b01b0e68..00667c50efa7 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -185,9 +185,9 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1, * are called the two key vectors. */ static struct sctp_auth_bytes *sctp_auth_make_key_vector( - sctp_random_param_t *random, - sctp_chunks_param_t *chunks, - sctp_hmac_algo_param_t *hmacs, + struct sctp_random_param *random, + struct sctp_chunks_param *chunks, + struct sctp_hmac_algo_param *hmacs, gfp_t gfp) { struct sctp_auth_bytes *new; @@ -226,10 +226,9 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector( gfp_t gfp) { return sctp_auth_make_key_vector( - (sctp_random_param_t *)asoc->c.auth_random, - (sctp_chunks_param_t *)asoc->c.auth_chunks, - (sctp_hmac_algo_param_t *)asoc->c.auth_hmacs, - gfp); + (struct sctp_random_param *)asoc->c.auth_random, + (struct sctp_chunks_param *)asoc->c.auth_chunks, + (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs, gfp); } /* Make a key vector based on peer's parameters */ diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 1ebc184a0e23..7df3704982f5 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -45,9 +45,9 @@ #include <net/sctp/sm.h> /* Forward declarations for internal helpers. */ -static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *, - union sctp_addr *, sctp_scope_t scope, gfp_t gfp, - int flags); +static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, + union sctp_addr *addr, enum sctp_scope scope, + gfp_t gfp, int flags); static void sctp_bind_addr_clean(struct sctp_bind_addr *); /* First Level Abstractions. */ @@ -57,7 +57,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *); */ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, - sctp_scope_t scope, gfp_t gfp, + enum sctp_scope scope, gfp_t gfp, int flags) { struct sctp_sockaddr_entry *addr; @@ -440,9 +440,8 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, /* Copy out addresses from the global local address list. */ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, - union sctp_addr *addr, - sctp_scope_t scope, gfp_t gfp, - int flags) + union sctp_addr *addr, enum sctp_scope scope, + gfp_t gfp, int flags) { int error = 0; @@ -485,9 +484,10 @@ int sctp_is_any(struct sock *sk, const union sctp_addr *addr) } /* Is 'addr' valid for 'scope'? */ -int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t scope) +int sctp_in_scope(struct net *net, const union sctp_addr *addr, + enum sctp_scope scope) { - sctp_scope_t addr_scope = sctp_scope(addr); + enum sctp_scope addr_scope = sctp_scope(addr); /* The unusable SCTP addresses will not be considered with * any defined scopes. @@ -545,7 +545,7 @@ int sctp_is_ep_boundall(struct sock *sk) ********************************************************************/ /* What is the scope of 'addr'? */ -sctp_scope_t sctp_scope(const union sctp_addr *addr) +enum sctp_scope sctp_scope(const union sctp_addr *addr) { struct sctp_af *af; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 1323d41e68b8..3afac275ee82 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -201,7 +201,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc); if (hmac_desc) - max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) + + max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) + hmac_desc->hmac_len); } @@ -221,7 +221,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, asoc->outqueue.out_qlen == 0 && list_empty(&asoc->outqueue.retransmit) && msg_len > max_data) - first_len -= SCTP_PAD4(sizeof(sctp_sack_chunk_t)); + first_len -= SCTP_PAD4(sizeof(struct sctp_sack_chunk)); /* Encourage Cookie-ECHO bundling. */ if (asoc->state < SCTP_STATE_COOKIE_ECHOED) diff --git a/net/sctp/debug.c b/net/sctp/debug.c index 2e47eb2f05cb..3f619fdcbf0a 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -60,7 +60,7 @@ static const char *const sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = { }; /* Lookup "chunk type" debug name. */ -const char *sctp_cname(const sctp_subtype_t cid) +const char *sctp_cname(const union sctp_subtype cid) { if (cid.chunk <= SCTP_CID_BASE_MAX) return sctp_cid_tbl[cid.chunk]; @@ -130,7 +130,7 @@ static const char *const sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = { }; /* Lookup primitive debug name. */ -const char *sctp_pname(const sctp_subtype_t id) +const char *sctp_pname(const union sctp_subtype id) { if (id.primitive <= SCTP_EVENT_PRIMITIVE_MAX) return sctp_primitive_tbl[id.primitive]; @@ -143,7 +143,7 @@ static const char *const sctp_other_tbl[] = { }; /* Lookup "other" debug name. */ -const char *sctp_oname(const sctp_subtype_t id) +const char *sctp_oname(const union sctp_subtype id) { if (id.other <= SCTP_EVENT_OTHER_MAX) return sctp_other_tbl[id.other]; @@ -165,7 +165,7 @@ static const char *const sctp_timer_tbl[] = { }; /* Lookup timer debug name. */ -const char *sctp_tname(const sctp_subtype_t id) +const char *sctp_tname(const union sctp_subtype id) { BUILD_BUG_ON(SCTP_EVENT_TIMEOUT_MAX + 1 != ARRAY_SIZE(sctp_timer_tbl)); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 0e86f988f836..ee1e601a0b11 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -73,13 +73,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, * variables. There are arrays that we encode directly * into parameters to make the rest of the operations easier. */ - auth_hmacs = kzalloc(sizeof(sctp_hmac_algo_param_t) + - sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp); + auth_hmacs = kzalloc(sizeof(*auth_hmacs) + + sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp); if (!auth_hmacs) goto nomem; - auth_chunks = kzalloc(sizeof(sctp_chunks_param_t) + - SCTP_NUM_CHUNK_TYPES, gfp); + auth_chunks = kzalloc(sizeof(*auth_chunks) + + SCTP_NUM_CHUNK_TYPES, gfp); if (!auth_chunks) goto nomem; @@ -382,8 +382,8 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) struct sctp_transport *transport; struct sctp_chunk *chunk; struct sctp_inq *inqueue; - sctp_subtype_t subtype; - sctp_state_t state; + union sctp_subtype subtype; + enum sctp_state state; int error = 0; int first_time = 1; /* is this the first time through the loop */ diff --git a/net/sctp/input.c b/net/sctp/input.c index 41eb2ec10460..92a07141fd07 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1111,7 +1111,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( __be16 peer_port, struct sctp_transport **transportp) { - sctp_addip_chunk_t *asconf = (struct sctp_addip_chunk *)ch; + struct sctp_addip_chunk *asconf = (struct sctp_addip_chunk *)ch; struct sctp_af *af; union sctp_addr_param *param; union sctp_addr paddr; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 2a186b201ad2..a2a1c1d08d51 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -243,8 +243,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; struct in6_addr *final_p, final; + enum sctp_scope scope; __u8 matchlen = 0; - sctp_scope_t scope; memset(fl6, 0, sizeof(struct flowi6)); fl6->daddr = daddr->v6.sin6_addr; @@ -497,7 +497,7 @@ static void sctp_v6_from_addr_param(union sctp_addr *addr, static int sctp_v6_to_addr_param(const union sctp_addr *addr, union sctp_addr_param *param) { - int length = sizeof(sctp_ipv6addr_param_t); + int length = sizeof(struct sctp_ipv6addr_param); param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS; param->v6.param_hdr.length = htons(length); @@ -624,10 +624,10 @@ static int sctp_v6_addr_valid(union sctp_addr *addr, } /* What is the scope of 'addr'? */ -static sctp_scope_t sctp_v6_scope(union sctp_addr *addr) +static enum sctp_scope sctp_v6_scope(union sctp_addr *addr) { + enum sctp_scope retval; int v6scope; - sctp_scope_t retval; /* The IPv6 scope is really a set of bit fields. * See IFA_* in <net/if_inet6.h>. Map to a generic SCTP scope. diff --git a/net/sctp/output.c b/net/sctp/output.c index 9d8504985744..4a865cd06d76 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -57,15 +57,15 @@ #include <net/sctp/checksum.h> /* Forward declarations for private helpers. */ -static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, - struct sctp_chunk *chunk); -static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, - struct sctp_chunk *chunk); +static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk); +static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet, + struct sctp_chunk *chunk); static void sctp_packet_append_data(struct sctp_packet *packet, - struct sctp_chunk *chunk); -static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, - struct sctp_chunk *chunk, - u16 chunk_len); + struct sctp_chunk *chunk); +static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet, + struct sctp_chunk *chunk, + u16 chunk_len); static void sctp_packet_reset(struct sctp_packet *packet) { @@ -181,11 +181,11 @@ void sctp_packet_free(struct sctp_packet *packet) * as it can fit in the packet, but any more data that does not fit in this * packet can be sent only after receiving the COOKIE_ACK. */ -sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, - struct sctp_chunk *chunk, - int one_packet, gfp_t gfp) +enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk, + int one_packet, gfp_t gfp) { - sctp_xmit_t retval; + enum sctp_xmit retval; pr_debug("%s: packet:%p size:%zu chunk:%p size:%d\n", __func__, packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1); @@ -218,12 +218,12 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, } /* Try to bundle an auth chunk into the packet. */ -static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt, - struct sctp_chunk *chunk) +static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt, + struct sctp_chunk *chunk) { struct sctp_association *asoc = pkt->transport->asoc; + enum sctp_xmit retval = SCTP_XMIT_OK; struct sctp_chunk *auth; - sctp_xmit_t retval = SCTP_XMIT_OK; /* if we don't have an association, we can't do authentication */ if (!asoc) @@ -254,10 +254,10 @@ static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt, } /* Try to bundle a SACK with the packet. */ -static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt, - struct sctp_chunk *chunk) +static enum sctp_xmit sctp_packet_bundle_sack(struct sctp_packet *pkt, + struct sctp_chunk *chunk) { - sctp_xmit_t retval = SCTP_XMIT_OK; + enum sctp_xmit retval = SCTP_XMIT_OK; /* If sending DATA and haven't aleady bundled a SACK, try to * bundle one in to the packet. @@ -299,11 +299,11 @@ out: /* Append a chunk to the offered packet reporting back any inability to do * so. */ -static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, - struct sctp_chunk *chunk) +static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk) { - sctp_xmit_t retval = SCTP_XMIT_OK; __u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length)); + enum sctp_xmit retval = SCTP_XMIT_OK; /* Check to see if this chunk will fit into the packet */ retval = sctp_packet_will_fit(packet, chunk, chunk_len); @@ -353,10 +353,10 @@ finish: /* Append a chunk to the offered packet reporting back any inability to do * so. */ -sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, - struct sctp_chunk *chunk) +enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk) { - sctp_xmit_t retval = SCTP_XMIT_OK; + enum sctp_xmit retval = SCTP_XMIT_OK; pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); @@ -653,8 +653,8 @@ out: ********************************************************************/ /* This private function check to see if a chunk can be added */ -static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, - struct sctp_chunk *chunk) +static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet, + struct sctp_chunk *chunk) { size_t datasize, rwnd, inflight, flight_size; struct sctp_transport *transport = packet->transport; @@ -762,12 +762,12 @@ static void sctp_packet_append_data(struct sctp_packet *packet, sctp_chunk_assign_ssn(chunk); } -static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, - struct sctp_chunk *chunk, - u16 chunk_len) +static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet, + struct sctp_chunk *chunk, + u16 chunk_len) { + enum sctp_xmit retval = SCTP_XMIT_OK; size_t psize, pmtu, maxsize; - sctp_xmit_t retval = SCTP_XMIT_OK; psize = packet->size; if (packet->transport->asoc) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e8762702a313..2966ff400755 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -534,7 +534,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, * one packet out. */ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, - sctp_retransmit_reason_t reason) + enum sctp_retransmit_reason reason) { struct net *net = sock_net(q->asoc->base.sk); @@ -594,14 +594,14 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, int rtx_timeout, int *start_timer) { - struct list_head *lqueue; struct sctp_transport *transport = pkt->transport; - sctp_xmit_t status; struct sctp_chunk *chunk, *chunk1; - int fast_rtx; + struct list_head *lqueue; + enum sctp_xmit status; int error = 0; int timer = 0; int done = 0; + int fast_rtx; lqueue = &q->retransmit; fast_rtx = q->fast_rtx; @@ -781,7 +781,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) struct sctp_transport *transport = NULL; struct sctp_transport *new_transport; struct sctp_chunk *chunk, *tmp; - sctp_xmit_t status; + enum sctp_xmit status; int error = 0; int start_timer = 0; int one_packet = 0; @@ -1197,7 +1197,7 @@ sctp_flush_out: static void sctp_sack_update_unack_data(struct sctp_association *assoc, struct sctp_sackhdr *sack) { - sctp_sack_variable_t *frags; + union sctp_sack_variable *frags; __u16 unack_data; int i; @@ -1224,7 +1224,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) struct sctp_transport *transport; struct sctp_chunk *tchunk = NULL; struct list_head *lchunk, *transport_list, *temp; - sctp_sack_variable_t *frags = sack->variable; + union sctp_sack_variable *frags = sack->variable; __u32 sack_ctsn, ctsn, tsn; __u32 highest_tsn, highest_new_tsn; __u32 sack_a_rwnd; @@ -1736,10 +1736,10 @@ static void sctp_mark_missing(struct sctp_outq *q, /* Is the given TSN acked by this packet? */ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn) { - int i; - sctp_sack_variable_t *frags; - __u16 tsn_offset, blocks; __u32 ctsn = ntohl(sack->cum_tsn_ack); + union sctp_sack_variable *frags; + __u16 tsn_offset, blocks; + int i; if (TSN_lte(tsn, ctsn)) goto pass; diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c index f0553a022859..c0817f7a8964 100644 --- a/net/sctp/primitive.c +++ b/net/sctp/primitive.c @@ -53,8 +53,8 @@ int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \ void *arg) { \ int error = 0; \ - sctp_event_t event_type; sctp_subtype_t subtype; \ - sctp_state_t state; \ + enum sctp_event event_type; union sctp_subtype subtype; \ + enum sctp_state state; \ struct sctp_endpoint *ep; \ \ event_type = SCTP_EVENT_T_PRIMITIVE; \ diff --git a/net/sctp/probe.c b/net/sctp/probe.c index 6cc2152e0740..43837dfc86a7 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -130,7 +130,7 @@ static const struct file_operations sctpprobe_fops = { static sctp_disposition_t jsctp_sf_eat_sack(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 989a900383b5..fcd80feb293f 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -196,7 +196,7 @@ static void sctp_free_local_addr_list(struct net *net) /* Copy the local addresses which are valid for 'scope' into 'bp'. */ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, - sctp_scope_t scope, gfp_t gfp, int copy_flags) + enum sctp_scope scope, gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; union sctp_addr laddr; @@ -292,7 +292,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr, static int sctp_v4_to_addr_param(const union sctp_addr *addr, union sctp_addr_param *param) { - int length = sizeof(sctp_ipv4addr_param_t); + int length = sizeof(struct sctp_ipv4addr_param); param->v4.param_hdr.type = SCTP_PARAM_IPV4_ADDRESS; param->v4.param_hdr.length = htons(length); @@ -400,9 +400,9 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) * IPv4 scoping can be controlled through sysctl option * net.sctp.addr_scope_policy */ -static sctp_scope_t sctp_v4_scope(union sctp_addr *addr) +static enum sctp_scope sctp_v4_scope(union sctp_addr *addr) { - sctp_scope_t retval; + enum sctp_scope retval; /* Check for unusable SCTP addresses. */ if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 6110447fe51d..3a8fb1dffbc1 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -69,7 +69,8 @@ static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc, static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, __u8 type, __u8 flags, int paylen, gfp_t gfp); -static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, +static struct sctp_cookie_param *sctp_pack_cookie( + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *init_chunk, int *cookie_len, @@ -134,14 +135,14 @@ static const struct sctp_paramhdr prsctp_param = { void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, size_t paylen) { - sctp_errhdr_t err; + struct sctp_errhdr err; __u16 len; /* Cause code constants are now defined in network order. */ err.cause = cause_code; - len = sizeof(sctp_errhdr_t) + paylen; + len = sizeof(err) + paylen; err.length = htons(len); - chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err); + chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(err), &err); } /* A helper to initialize an op error inside a @@ -152,19 +153,19 @@ void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code, size_t paylen) { - sctp_errhdr_t err; + struct sctp_errhdr err; __u16 len; /* Cause code constants are now defined in network order. */ err.cause = cause_code; - len = sizeof(sctp_errhdr_t) + paylen; + len = sizeof(err) + paylen; err.length = htons(len); if (skb_tailroom(chunk->skb) < len) return -ENOSPC; - chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, - sizeof(sctp_errhdr_t), - &err); + + chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, sizeof(err), &err); + return 0; } /* 3.3.2 Initiation (INIT) (1) @@ -223,10 +224,10 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, struct sctp_chunk *retval = NULL; int num_types, addrs_len = 0; struct sctp_sock *sp; - sctp_supported_addrs_param_t sat; + struct sctp_supported_addrs_param sat; __be16 types[2]; - sctp_adaptation_ind_param_t aiparam; - sctp_supported_ext_param_t ext_param; + struct sctp_adaptation_ind_param aiparam; + struct sctp_supported_ext_param ext_param; int num_ext = 0; __u8 extensions[4]; struct sctp_paramhdr *auth_chunks = NULL, @@ -305,8 +306,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* If we have any extensions to report, account for that */ if (num_ext) - chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext); /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -348,10 +348,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, */ if (num_ext) { ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT; - ext_param.param_hdr.length = - htons(sizeof(sctp_supported_ext_param_t) + num_ext); - sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), - &ext_param); + ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext); + sctp_addto_chunk(retval, sizeof(ext_param), &ext_param); sctp_addto_param(retval, num_ext, extensions); } @@ -390,11 +388,11 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, union sctp_params addrs; struct sctp_sock *sp; int addrs_len; - sctp_cookie_param_t *cookie; + struct sctp_cookie_param *cookie; int cookie_len; size_t chunksize; - sctp_adaptation_ind_param_t aiparam; - sctp_supported_ext_param_t ext_param; + struct sctp_adaptation_ind_param aiparam; + struct sctp_supported_ext_param ext_param; int num_ext = 0; __u8 extensions[4]; struct sctp_paramhdr *auth_chunks = NULL, @@ -468,8 +466,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, } if (num_ext) - chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext); /* Now allocate and fill out the chunk. */ retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp); @@ -495,10 +492,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); if (num_ext) { ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT; - ext_param.param_hdr.length = - htons(sizeof(sctp_supported_ext_param_t) + num_ext); - sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), - &ext_param); + ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext); + sctp_addto_chunk(retval, sizeof(ext_param), &ext_param); sctp_addto_param(retval, num_ext, extensions); } if (asoc->peer.prsctp_capable) @@ -668,11 +663,11 @@ struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc, const struct sctp_chunk *chunk) { struct sctp_chunk *retval; - sctp_cwrhdr_t cwr; + struct sctp_cwrhdr cwr; cwr.lowest_tsn = htonl(lowest_tsn); retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0, - sizeof(sctp_cwrhdr_t), GFP_ATOMIC); + sizeof(cwr), GFP_ATOMIC); if (!retval) goto nodata; @@ -702,11 +697,11 @@ struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc, const __u32 lowest_tsn) { struct sctp_chunk *retval; - sctp_ecnehdr_t ecne; + struct sctp_ecnehdr ecne; ecne.lowest_tsn = htonl(lowest_tsn); retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0, - sizeof(sctp_ecnehdr_t), GFP_ATOMIC); + sizeof(ecne), GFP_ATOMIC); if (!retval) goto nodata; retval->subh.ecne_hdr = @@ -862,15 +857,15 @@ nodata: struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc, const struct sctp_chunk *chunk) { + struct sctp_shutdownhdr shut; struct sctp_chunk *retval; - sctp_shutdownhdr_t shut; __u32 ctsn; ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); shut.cum_tsn_ack = htonl(ctsn); retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0, - sizeof(sctp_shutdownhdr_t), GFP_ATOMIC); + sizeof(shut), GFP_ATOMIC); if (!retval) goto nodata; @@ -984,8 +979,8 @@ struct sctp_chunk *sctp_make_abort_no_data( struct sctp_chunk *retval; __be32 payload; - retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) - + sizeof(tsn)); + retval = sctp_make_abort(asoc, chunk, + sizeof(struct sctp_errhdr) + sizeof(tsn)); if (!retval) goto no_mem; @@ -1020,7 +1015,8 @@ struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc, void *payload = NULL; int err; - retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen); + retval = sctp_make_abort(asoc, NULL, + sizeof(struct sctp_errhdr) + paylen); if (!retval) goto err_chunk; @@ -1085,8 +1081,8 @@ struct sctp_chunk *sctp_make_abort_violation( struct sctp_chunk *retval; struct sctp_paramhdr phdr; - retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen + - sizeof(phdr)); + retval = sctp_make_abort(asoc, chunk, sizeof(struct sctp_errhdr) + + paylen + sizeof(phdr)); if (!retval) goto end; @@ -1109,7 +1105,7 @@ struct sctp_chunk *sctp_make_violation_paramlen( { struct sctp_chunk *retval; static const char error[] = "The following parameter had invalid length:"; - size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) + + size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr) + sizeof(*param); retval = sctp_make_abort(asoc, chunk, payload_len); @@ -1131,7 +1127,7 @@ struct sctp_chunk *sctp_make_violation_max_retrans( { struct sctp_chunk *retval; static const char error[] = "Association exceeded its max_retans count"; - size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t); + size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr); retval = sctp_make_abort(asoc, chunk, payload_len); if (!retval) @@ -1214,7 +1210,8 @@ static struct sctp_chunk *sctp_make_op_error_space( struct sctp_chunk *retval; retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0, - sizeof(sctp_errhdr_t) + size, GFP_ATOMIC); + sizeof(struct sctp_errhdr) + size, + GFP_ATOMIC); if (!retval) goto nodata; @@ -1285,16 +1282,16 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) return NULL; retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0, - hmac_desc->hmac_len + sizeof(sctp_authhdr_t), - GFP_ATOMIC); + hmac_desc->hmac_len + sizeof(auth_hdr), + GFP_ATOMIC); if (!retval) return NULL; auth_hdr.hmac_id = htons(hmac_desc->hmac_id); auth_hdr.shkey_id = htons(asoc->active_key_id); - retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(sctp_authhdr_t), - &auth_hdr); + retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr), + &auth_hdr); hmac = skb_put_zero(retval->skb, hmac_desc->hmac_len); @@ -1581,8 +1578,8 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep, gfp_t gfp) { struct sctp_association *asoc; + enum sctp_scope scope; struct sk_buff *skb; - sctp_scope_t scope; /* Create the bare association. */ scope = sctp_scope(sctp_source(chunk)); @@ -1601,14 +1598,15 @@ nodata: /* Build a cookie representing asoc. * This INCLUDES the param header needed to put the cookie in the INIT ACK. */ -static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const struct sctp_chunk *init_chunk, - int *cookie_len, - const __u8 *raw_addrs, int addrs_len) +static struct sctp_cookie_param *sctp_pack_cookie( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const struct sctp_chunk *init_chunk, + int *cookie_len, + const __u8 *raw_addrs, int addrs_len) { - sctp_cookie_param_t *retval; struct sctp_signed_cookie *cookie; + struct sctp_cookie_param *retval; int headersize, bodysize; /* Header size is static data prior to the actual cookie, including @@ -1703,7 +1701,7 @@ struct sctp_association *sctp_unpack_cookie( int headersize, bodysize, fixed_size; __u8 *digest = ep->digest; unsigned int len; - sctp_scope_t scope; + enum sctp_scope scope; struct sk_buff *skb = chunk->skb; ktime_t kt; @@ -2067,10 +2065,11 @@ static void sctp_process_ext_param(struct sctp_association *asoc, * SCTP_IERROR_ERROR - stop and report an error. * SCTP_IERROR_NOMEME - out of memory. */ -static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, - union sctp_params param, - struct sctp_chunk *chunk, - struct sctp_chunk **errp) +static enum sctp_ierror sctp_process_unk_param( + const struct sctp_association *asoc, + union sctp_params param, + struct sctp_chunk *chunk, + struct sctp_chunk **errp) { int retval = SCTP_IERROR_NO_ERROR; @@ -2119,13 +2118,13 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, * SCTP_IERROR_ERROR - stop processing, trigger an ERROR * SCTP_IERROR_NO_ERROR - continue with the chunk */ -static sctp_ierror_t sctp_verify_param(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - union sctp_params param, - enum sctp_cid cid, - struct sctp_chunk *chunk, - struct sctp_chunk **err_chunk) +static enum sctp_ierror sctp_verify_param(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + union sctp_params param, + enum sctp_cid cid, + struct sctp_chunk *chunk, + struct sctp_chunk **err_chunk) { struct sctp_hmac_algo_param *hmacs; int retval = SCTP_IERROR_NO_ERROR; @@ -2504,7 +2503,7 @@ static int sctp_process_param(struct sctp_association *asoc, int i; __u16 sat; int retval = 1; - sctp_scope_t scope; + enum sctp_scope scope; u32 stale; struct sctp_af *af; union sctp_addr_param *addr_param; @@ -2617,7 +2616,7 @@ do_addr_param: if (!net->sctp.addip_enable) goto fall_through; - addr_param = param.v + sizeof(sctp_addip_param_t); + addr_param = param.v + sizeof(struct sctp_addip_param); af = sctp_get_af_specific(param_type2af(addr_param->p.type)); if (af == NULL) @@ -2754,7 +2753,7 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc, union sctp_addr *addr, int vparam_len) { - sctp_addiphdr_t asconf; + struct sctp_addiphdr asconf; struct sctp_chunk *retval; int length = sizeof(asconf) + vparam_len; union sctp_addr_param addrparam; @@ -2812,7 +2811,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, int addrcnt, __be16 flags) { - sctp_addip_param_t param; + struct sctp_addip_param param; struct sctp_chunk *retval; union sctp_addr_param addr_param; union sctp_addr *addr; @@ -2898,7 +2897,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc, union sctp_addr *addr) { - sctp_addip_param_t param; + struct sctp_addip_param param; struct sctp_chunk *retval; int len = sizeof(param); union sctp_addr_param addrparam; @@ -2947,7 +2946,7 @@ struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc, static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *asoc, __u32 serial, int vparam_len) { - sctp_addiphdr_t asconf; + struct sctp_addiphdr asconf; struct sctp_chunk *retval; int length = sizeof(asconf) + vparam_len; @@ -2967,10 +2966,11 @@ static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *as /* Add response parameters to an ASCONF_ACK chunk. */ static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id, - __be16 err_code, sctp_addip_param_t *asconf_param) + __be16 err_code, + struct sctp_addip_param *asconf_param) { - sctp_addip_param_t ack_param; - sctp_errhdr_t err_param; + struct sctp_addip_param ack_param; + struct sctp_errhdr err_param; int asconf_param_len = 0; int err_param_len = 0; __be16 response_type; @@ -3008,15 +3008,15 @@ static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id, /* Process a asconf parameter. */ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, - struct sctp_chunk *asconf, - sctp_addip_param_t *asconf_param) + struct sctp_chunk *asconf, + struct sctp_addip_param *asconf_param) { struct sctp_transport *peer; struct sctp_af *af; union sctp_addr addr; union sctp_addr_param *addr_param; - addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t); + addr_param = (void *)asconf_param + sizeof(*asconf_param); if (asconf_param->param_hdr.type != SCTP_PARAM_ADD_IP && asconf_param->param_hdr.type != SCTP_PARAM_DEL_IP && @@ -3141,10 +3141,11 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, struct sctp_chunk *chunk, bool addr_param_needed, struct sctp_paramhdr **errp) { - sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) chunk->chunk_hdr; + struct sctp_addip_chunk *addip; union sctp_params param; bool addr_param_seen = false; + addip = (struct sctp_addip_chunk *)chunk->chunk_hdr; sctp_walk_params(param, addip, addip_hdr.params) { size_t length = ntohs(param.p->length); @@ -3153,7 +3154,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, case SCTP_PARAM_ERR_CAUSE: break; case SCTP_PARAM_IPV4_ADDRESS: - if (length != sizeof(sctp_ipv4addr_param_t)) + if (length != sizeof(struct sctp_ipv4addr_param)) return false; /* ensure there is only one addr param and it's in the * beginning of addip_hdr params, or we reject it. @@ -3163,7 +3164,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, addr_param_seen = true; break; case SCTP_PARAM_IPV6_ADDRESS: - if (length != sizeof(sctp_ipv6addr_param_t)) + if (length != sizeof(struct sctp_ipv6addr_param)) return false; if (param.v != addip->addip_hdr.params) return false; @@ -3176,13 +3177,13 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, if (addr_param_needed && !addr_param_seen) return false; length = ntohs(param.addip->param_hdr.length); - if (length < sizeof(sctp_addip_param_t) + + if (length < sizeof(struct sctp_addip_param) + sizeof(**errp)) return false; break; case SCTP_PARAM_SUCCESS_REPORT: case SCTP_PARAM_ADAPTATION_LAYER_IND: - if (length != sizeof(sctp_addip_param_t)) + if (length != sizeof(struct sctp_addip_param)) return false; break; default: @@ -3208,10 +3209,10 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, struct sctp_chunk *asconf) { - sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) asconf->chunk_hdr; + struct sctp_addip_chunk *addip; bool all_param_pass = true; union sctp_params param; - sctp_addiphdr_t *hdr; + struct sctp_addiphdr *hdr; union sctp_addr_param *addr_param; struct sctp_chunk *asconf_ack; __be16 err_code; @@ -3219,13 +3220,14 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, int chunk_len; __u32 serial; + addip = (struct sctp_addip_chunk *)asconf->chunk_hdr; chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(struct sctp_chunkhdr); - hdr = (sctp_addiphdr_t *)asconf->skb->data; + hdr = (struct sctp_addiphdr *)asconf->skb->data; serial = ntohl(hdr->serial); /* Skip the addiphdr and store a pointer to address parameter. */ - length = sizeof(sctp_addiphdr_t); + length = sizeof(*hdr); addr_param = (union sctp_addr_param *)(asconf->skb->data + length); chunk_len -= length; @@ -3291,7 +3293,7 @@ done: /* Process a asconf parameter that is successfully acked. */ static void sctp_asconf_param_success(struct sctp_association *asoc, - sctp_addip_param_t *asconf_param) + struct sctp_addip_param *asconf_param) { struct sctp_af *af; union sctp_addr addr; @@ -3300,7 +3302,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, struct sctp_transport *transport; struct sctp_sockaddr_entry *saddr; - addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t); + addr_param = (void *)asconf_param + sizeof(*asconf_param); /* We have checked the packet before, so we do not check again. */ af = sctp_get_af_specific(param_type2af(addr_param->p.type)); @@ -3351,11 +3353,11 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, * specific success indication is present for the parameter. */ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, - sctp_addip_param_t *asconf_param, - int no_err) + struct sctp_addip_param *asconf_param, + int no_err) { - sctp_addip_param_t *asconf_ack_param; - sctp_errhdr_t *err_param; + struct sctp_addip_param *asconf_ack_param; + struct sctp_errhdr *err_param; int length; int asconf_ack_len; __be16 err_code; @@ -3371,9 +3373,9 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, /* Skip the addiphdr from the asconf_ack chunk and store a pointer to * the first asconf_ack parameter. */ - length = sizeof(sctp_addiphdr_t); - asconf_ack_param = (sctp_addip_param_t *)(asconf_ack->skb->data + - length); + length = sizeof(struct sctp_addiphdr); + asconf_ack_param = (struct sctp_addip_param *)(asconf_ack->skb->data + + length); asconf_ack_len -= length; while (asconf_ack_len > 0) { @@ -3382,7 +3384,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, case SCTP_PARAM_SUCCESS_REPORT: return SCTP_ERROR_NO_ERROR; case SCTP_PARAM_ERR_CAUSE: - length = sizeof(sctp_addip_param_t); + length = sizeof(*asconf_ack_param); err_param = (void *)asconf_ack_param + length; asconf_ack_len -= length; if (asconf_ack_len > 0) @@ -3409,7 +3411,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, { struct sctp_chunk *asconf = asoc->addip_last_asconf; union sctp_addr_param *addr_param; - sctp_addip_param_t *asconf_param; + struct sctp_addip_param *asconf_param; int length = 0; int asconf_len = asconf->skb->len; int all_param_pass = 0; @@ -3420,7 +3422,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, /* Skip the chunkhdr and addiphdr from the last asconf sent and store * a pointer to address parameter. */ - length = sizeof(sctp_addip_chunk_t); + length = sizeof(struct sctp_addip_chunk); addr_param = (union sctp_addr_param *)(asconf->skb->data + length); asconf_len -= length; @@ -3436,7 +3438,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, * failures are indicated, then all request(s) are considered * successful. */ - if (asconf_ack->skb->len == sizeof(sctp_addiphdr_t)) + if (asconf_ack->skb->len == sizeof(struct sctp_addiphdr)) all_param_pass = 1; /* Process the TLVs contained in the last sent ASCONF chunk. */ diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index d6e5e9e0fd6d..4a12d29d9aa1 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -51,17 +51,18 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -static int sctp_cmd_interpreter(sctp_event_t event_type, - sctp_subtype_t subtype, - sctp_state_t state, +static int sctp_cmd_interpreter(enum sctp_event event_type, + union sctp_subtype subtype, + enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, sctp_disposition_t status, sctp_cmd_seq_t *commands, gfp_t gfp); -static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, - sctp_state_t state, +static int sctp_side_effects(enum sctp_event event_type, + union sctp_subtype subtype, + enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association **asoc, void *event_arg, @@ -280,7 +281,7 @@ out_unlock: * for timeouts which use the association as their parameter. */ static void sctp_generate_timeout_event(struct sctp_association *asoc, - sctp_event_timeout_t timeout_type) + enum sctp_event_timeout timeout_type) { struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); @@ -602,8 +603,8 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands, /* Worker routine to handle SCTP_CMD_ASSOC_FAILED. */ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands, struct sctp_association *asoc, - sctp_event_t event_type, - sctp_subtype_t subtype, + enum sctp_event event_type, + union sctp_subtype subtype, struct sctp_chunk *chunk, unsigned int error) { @@ -828,7 +829,7 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds, if (!sctp_assoc_update(asoc, new)) return; - abort = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t)); + abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr)); if (abort) { sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0); sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); @@ -843,7 +844,7 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds, /* Helper function to change the state of an association. */ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, struct sctp_association *asoc, - sctp_state_t state) + enum sctp_state state) { struct sock *sk = asoc->base.sk; @@ -1052,8 +1053,8 @@ static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands, static void sctp_cmd_t1_timer_update(struct sctp_association *asoc, - sctp_event_timeout_t timer, - char *name) + enum sctp_event_timeout timer, + char *name) { struct sctp_transport *t; @@ -1139,18 +1140,16 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc) * If you want to understand all of lksctp, this is a * good place to start. */ -int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, - sctp_state_t state, - struct sctp_endpoint *ep, - struct sctp_association *asoc, - void *event_arg, - gfp_t gfp) +int sctp_do_sm(struct net *net, enum sctp_event event_type, + union sctp_subtype subtype, enum sctp_state state, + struct sctp_endpoint *ep, struct sctp_association *asoc, + void *event_arg, gfp_t gfp) { sctp_cmd_seq_t commands; const sctp_sm_table_entry_t *state_fn; sctp_disposition_t status; int error = 0; - typedef const char *(printfn_t)(sctp_subtype_t); + typedef const char *(printfn_t)(union sctp_subtype); static printfn_t *table[] = { NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname, }; @@ -1178,8 +1177,9 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, /***************************************************************** * This the master state function side effect processing function. *****************************************************************/ -static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, - sctp_state_t state, +static int sctp_side_effects(enum sctp_event event_type, + union sctp_subtype subtype, + enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association **asoc, void *event_arg, @@ -1263,9 +1263,9 @@ bail: ********************************************************************/ /* This is the side-effect interpreter. */ -static int sctp_cmd_interpreter(sctp_event_t event_type, - sctp_subtype_t subtype, - sctp_state_t state, +static int sctp_cmd_interpreter(enum sctp_event event_type, + union sctp_subtype subtype, + enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index b2a74c3823ee..ac6aaa046529 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -80,19 +80,19 @@ static void sctp_send_stale_cookie_err(struct net *net, static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands); static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk); @@ -116,7 +116,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands); @@ -124,7 +124,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, void *ext, sctp_cmd_seq_t *commands); @@ -132,7 +132,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands); @@ -140,20 +140,21 @@ static sctp_disposition_t sctp_sf_violation_chunk( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands); -static sctp_ierror_t sctp_sf_authenticate(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - struct sctp_chunk *chunk); +static enum sctp_ierror sctp_sf_authenticate( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + struct sctp_chunk *chunk); static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands); @@ -216,7 +217,7 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, __u16 required_length) sctp_disposition_t sctp_sf_do_4_C(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -302,16 +303,14 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net, sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { - struct sctp_chunk *chunk = arg; - struct sctp_chunk *repl; + struct sctp_chunk *chunk = arg, *repl, *err_chunk; + struct sctp_unrecognized_param *unk_param; struct sctp_association *new_asoc; - struct sctp_chunk *err_chunk; struct sctp_packet *packet; - sctp_unrecognized_param_t *unk_param; int len; /* 6.10 Bundling @@ -435,7 +434,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, * construct the parameters in INIT ACK by copying the * ERROR causes over. */ - unk_param = (sctp_unrecognized_param_t *) + unk_param = (struct sctp_unrecognized_param *) ((__u8 *)(err_chunk->chunk_hdr) + sizeof(struct sctp_chunkhdr)); /* Replace the cause code with the "Unrecognized parameter" @@ -498,7 +497,7 @@ nomem: sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -518,7 +517,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the INIT-ACK chunk has a valid length */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_initack_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ @@ -530,7 +529,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, (struct sctp_init_chunk *)chunk->chunk_hdr, chunk, &err_chunk)) { - sctp_error_t error = SCTP_ERROR_NO_RESOURCE; + enum sctp_error error = SCTP_ERROR_NO_RESOURCE; /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes. If there are no causes, @@ -648,7 +647,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, void *arg, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; @@ -758,7 +757,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, */ if (chunk->auth_chunk) { struct sctp_chunk auth; - sctp_ierror_t ret; + enum sctp_ierror ret; /* Make sure that we and the peer are AUTH capable */ if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) { @@ -875,7 +874,7 @@ nomem: sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, void *arg, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; @@ -952,7 +951,7 @@ nomem: /* Generate and sendout a heartbeat packet. */ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -978,7 +977,7 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep, sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -1026,7 +1025,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net, sctp_disposition_t sctp_sf_send_reconf(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, void *arg, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_transport *transport = arg; @@ -1077,7 +1076,7 @@ sctp_disposition_t sctp_sf_send_reconf(struct net *net, sctp_disposition_t sctp_sf_beat_8_3(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -1090,7 +1089,8 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t))) + if (!sctp_chunk_length_valid(chunk, + sizeof(struct sctp_heartbeat_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -1098,7 +1098,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net, * respond with a HEARTBEAT ACK that contains the Heartbeat * Information field copied from the received HEARTBEAT chunk. */ - chunk->subh.hb_hdr = (sctp_heartbeathdr_t *)chunk->skb->data; + chunk->subh.hb_hdr = (struct sctp_heartbeathdr *)chunk->skb->data; param_hdr = (struct sctp_paramhdr *)chunk->subh.hb_hdr; paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr); @@ -1151,7 +1151,7 @@ nomem: sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -1234,7 +1234,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa, union sctp_addr_param *addrparm; struct sctp_errhdr *errhdr; struct sctp_endpoint *ep; - char buffer[sizeof(struct sctp_errhdr)+sizeof(union sctp_addr_param)]; + char buffer[sizeof(*errhdr) + sizeof(*addrparm)]; struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family); /* Build the error on the stack. We are way to malloc crazy @@ -1245,7 +1245,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa, /* Copy into a parm format. */ len = af->to_addr_param(ssa, addrparm); - len += sizeof(sctp_errhdr_t); + len += sizeof(*errhdr); errhdr->cause = SCTP_ERROR_RESTART; errhdr->length = htons(len); @@ -1416,16 +1416,14 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { - sctp_disposition_t retval; - struct sctp_chunk *chunk = arg; - struct sctp_chunk *repl; + struct sctp_chunk *chunk = arg, *repl, *err_chunk; + struct sctp_unrecognized_param *unk_param; struct sctp_association *new_asoc; - struct sctp_chunk *err_chunk; struct sctp_packet *packet; - sctp_unrecognized_param_t *unk_param; + sctp_disposition_t retval; int len; /* 6.10 Bundling @@ -1555,7 +1553,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( * construct the parameters in INIT ACK by copying the * ERROR causes over. */ - unk_param = (sctp_unrecognized_param_t *) + unk_param = (struct sctp_unrecognized_param *) ((__u8 *)(err_chunk->chunk_hdr) + sizeof(struct sctp_chunkhdr)); /* Replace the cause code with the "Unrecognized parameter" @@ -1629,7 +1627,7 @@ cleanup: sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -1683,7 +1681,7 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net, sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -1706,7 +1704,7 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net, sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { /* Per the above section, we'll discard the chunk if we have an @@ -2029,7 +2027,7 @@ nomem: sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -2148,7 +2146,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -2167,7 +2165,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( * as we do not know its true length. So, to be safe, discard the * packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks @@ -2190,7 +2188,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -2209,7 +2207,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net, * as we do not know its true length. So, to be safe, discard the * packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks @@ -2241,7 +2239,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -2268,12 +2266,12 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort( sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; - sctp_errhdr_t *err; + struct sctp_errhdr *err; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2281,7 +2279,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net, /* Make sure that the ERROR chunk has a valid length. * The parameter walking depends on this as well. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_operr_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -2332,17 +2330,16 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net, static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { - struct sctp_chunk *chunk = arg; - u32 stale; - sctp_cookie_preserve_param_t bht; - sctp_errhdr_t *err; - struct sctp_chunk *reply; - struct sctp_bind_addr *bp; int attempts = asoc->init_err_counter + 1; + struct sctp_chunk *chunk = arg, *reply; + struct sctp_cookie_preserve_param bht; + struct sctp_bind_addr *bp; + struct sctp_errhdr *err; + u32 stale; if (attempts > asoc->max_init_attempts) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, @@ -2352,7 +2349,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, return SCTP_DISPOSITION_DELETE_TCB; } - err = (sctp_errhdr_t *)(chunk->skb->data); + err = (struct sctp_errhdr *)(chunk->skb->data); /* When calculating the time extension, an implementation * SHOULD use the RTT information measured based on the @@ -2368,7 +2365,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, * to give ample time to retransmit the new cookie and thus * yield a higher probability of success on the reattempt. */ - stale = ntohl(*(__be32 *)((u8 *)err + sizeof(sctp_errhdr_t))); + stale = ntohl(*(__be32 *)((u8 *)err + sizeof(*err))); stale = (stale * 2) / 1000; bht.param_hdr.type = SCTP_PARAM_COOKIE_PRESERVATIVE; @@ -2455,7 +2452,7 @@ nomem: sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -2474,7 +2471,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net, * as we do not know its true length. So, to be safe, discard the * packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks @@ -2492,7 +2489,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net, static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -2503,13 +2500,14 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net, /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) { + struct sctp_errhdr *err; - sctp_errhdr_t *err; sctp_walk_errors(err, chunk->chunk_hdr); if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, + commands); - error = ((sctp_errhdr_t *)chunk->skb->data)->cause; + error = ((struct sctp_errhdr *)chunk->skb->data)->cause; } sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); @@ -2529,7 +2527,7 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net, sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -2550,13 +2548,13 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net, * as we do not know its true length. So, to be safe, discard the * packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) - error = ((sctp_errhdr_t *)chunk->skb->data)->cause; + error = ((struct sctp_errhdr *)chunk->skb->data)->cause; return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc, chunk->transport); @@ -2568,7 +2566,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net, sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -2583,7 +2581,7 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net, sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -2655,13 +2653,13 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; - sctp_shutdownhdr_t *sdh; sctp_disposition_t disposition; + struct sctp_shutdownhdr *sdh; struct sctp_ulpevent *ev; __u32 ctsn; @@ -2669,14 +2667,13 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, - sizeof(struct sctp_shutdown_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Convert the elaborate header. */ - sdh = (sctp_shutdownhdr_t *)chunk->skb->data; - skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t)); + sdh = (struct sctp_shutdownhdr *)chunk->skb->data; + skb_pull(chunk->skb, sizeof(*sdh)); chunk->subh.shutdown_hdr = sdh; ctsn = ntohl(sdh->cum_tsn_ack); @@ -2745,24 +2742,23 @@ out: sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; - sctp_shutdownhdr_t *sdh; + struct sctp_shutdownhdr *sdh; __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, - sizeof(struct sctp_shutdown_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - sdh = (sctp_shutdownhdr_t *)chunk->skb->data; + sdh = (struct sctp_shutdownhdr *)chunk->skb->data; ctsn = ntohl(sdh->cum_tsn_ack); if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { @@ -2799,7 +2795,7 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net, sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -2863,23 +2859,23 @@ nomem: sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { - sctp_cwrhdr_t *cwr; struct sctp_chunk *chunk = arg; + struct sctp_cwrhdr *cwr; u32 lowest_tsn; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_ecne_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - cwr = (sctp_cwrhdr_t *) chunk->skb->data; - skb_pull(chunk->skb, sizeof(sctp_cwrhdr_t)); + cwr = (struct sctp_cwrhdr *)chunk->skb->data; + skb_pull(chunk->skb, sizeof(*cwr)); lowest_tsn = ntohl(cwr->lowest_tsn); @@ -2919,22 +2915,22 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net, sctp_disposition_t sctp_sf_do_ecne(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { - sctp_ecnehdr_t *ecne; struct sctp_chunk *chunk = arg; + struct sctp_ecnehdr *ecne; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_ecne_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - ecne = (sctp_ecnehdr_t *) chunk->skb->data; - skb_pull(chunk->skb, sizeof(sctp_ecnehdr_t)); + ecne = (struct sctp_ecnehdr *)chunk->skb->data; + skb_pull(chunk->skb, sizeof(*ecne)); /* If this is a newer ECNE than the last CWR packet we sent out */ sctp_add_cmd_sf(commands, SCTP_CMD_ECN_ECNE, @@ -2976,7 +2972,7 @@ sctp_disposition_t sctp_sf_do_ecne(struct net *net, sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -3096,7 +3092,7 @@ discard_noforce: sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -3187,19 +3183,19 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; - sctp_sackhdr_t *sackh; + struct sctp_sackhdr *sackh; __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SACK chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_sack_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_sack_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -3261,7 +3257,7 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net, static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -3311,18 +3307,18 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net, sctp_disposition_t sctp_sf_operr_notify(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; - sctp_errhdr_t *err; + struct sctp_errhdr *err; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ERROR chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_operr_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); sctp_walk_errors(err, chunk->chunk_hdr); @@ -3349,7 +3345,7 @@ sctp_disposition_t sctp_sf_operr_notify(struct net *net, sctp_disposition_t sctp_sf_do_9_2_final(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -3432,14 +3428,14 @@ nomem: sctp_disposition_t sctp_sf_ootb(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; struct sk_buff *skb = chunk->skb; struct sctp_chunkhdr *ch; - sctp_errhdr_t *err; + struct sctp_errhdr *err; __u8 *ch_end; int ootb_shut_ack = 0; int ootb_cookie_ack = 0; @@ -3525,7 +3521,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -3587,7 +3583,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net, sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -3612,13 +3608,13 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net, sctp_disposition_t sctp_sf_do_asconf(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, void *arg, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; struct sctp_chunk *asconf_ack = NULL; struct sctp_paramhdr *err_param = NULL; - sctp_addiphdr_t *hdr; + struct sctp_addiphdr *hdr; __u32 serial; if (!sctp_vtag_verify(chunk, asoc)) { @@ -3634,14 +3630,15 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, * described in [I-D.ietf-tsvwg-sctp-auth]. */ if (!net->sctp.addip_noauth && !chunk->auth) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, + commands); /* Make sure that the ASCONF ADDIP chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - hdr = (sctp_addiphdr_t *)chunk->skb->data; + hdr = (struct sctp_addiphdr *)chunk->skb->data; serial = ntohl(hdr->serial); /* Verify the ASCONF chunk before processing it. */ @@ -3728,14 +3725,15 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, void *arg, + const union sctp_subtype type, + void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *asconf_ack = arg; struct sctp_chunk *last_asconf = asoc->addip_last_asconf; struct sctp_chunk *abort; struct sctp_paramhdr *err_param = NULL; - sctp_addiphdr_t *addip_hdr; + struct sctp_addiphdr *addip_hdr; __u32 sent_serial, rcvd_serial; if (!sctp_vtag_verify(asconf_ack, asoc)) { @@ -3751,14 +3749,16 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, * described in [I-D.ietf-tsvwg-sctp-auth]. */ if (!net->sctp.addip_noauth && !asconf_ack->auth) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, + commands); /* Make sure that the ADDIP chunk has a valid length. */ - if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t))) + if (!sctp_chunk_length_valid(asconf_ack, + sizeof(struct sctp_addip_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - addip_hdr = (sctp_addiphdr_t *)asconf_ack->skb->data; + addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data; rcvd_serial = ntohl(addip_hdr->serial); /* Verify the ASCONF-ACK chunk before processing it. */ @@ -3767,7 +3767,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, (void *)err_param, commands); if (last_asconf) { - addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr; + addip_hdr = (struct sctp_addiphdr *)last_asconf->subh.addip_hdr; sent_serial = ntohl(addip_hdr->serial); } else { sent_serial = asoc->addip_serial - 1; @@ -3782,7 +3782,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, if (ADDIP_SERIAL_gte(rcvd_serial, sent_serial + 1) && !(asoc->addip_last_asconf)) { abort = sctp_make_abort(asoc, asconf_ack, - sizeof(sctp_errhdr_t)); + sizeof(struct sctp_errhdr)); if (abort) { sctp_init_cause(abort, SCTP_ERROR_ASCONF_ACK, 0); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, @@ -3818,7 +3818,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, } abort = sctp_make_abort(asoc, asconf_ack, - sizeof(sctp_errhdr_t)); + sizeof(struct sctp_errhdr)); if (abort) { sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, @@ -3844,7 +3844,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, sctp_disposition_t sctp_sf_do_reconf(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, void *arg, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_paramhdr *err_param = NULL; @@ -3920,7 +3920,7 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net, sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -3991,7 +3991,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -4079,11 +4079,12 @@ gen_shutdown: * * The return value is the disposition of the chunk. */ -static sctp_ierror_t sctp_sf_authenticate(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - struct sctp_chunk *chunk) +static enum sctp_ierror sctp_sf_authenticate( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + struct sctp_chunk *chunk) { struct sctp_authhdr *auth_hdr; struct sctp_hmac *hmac; @@ -4095,7 +4096,7 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net, /* Pull in the auth header, so we can do some more verification */ auth_hdr = (struct sctp_authhdr *)chunk->skb->data; chunk->subh.auth_hdr = auth_hdr; - skb_pull(chunk->skb, sizeof(struct sctp_authhdr)); + skb_pull(chunk->skb, sizeof(*auth_hdr)); /* Make sure that we support the HMAC algorithm from the auth * chunk. @@ -4114,7 +4115,8 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net, /* Make sure that the length of the signature matches what * we expect. */ - sig_len = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_auth_chunk_t); + sig_len = ntohs(chunk->chunk_hdr->length) - + sizeof(struct sctp_auth_chunk); hmac = sctp_auth_get_hmac(ntohs(auth_hdr->hmac_id)); if (sig_len != hmac->hmac_len) return SCTP_IERROR_PROTO_VIOLATION; @@ -4136,8 +4138,8 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net, memset(digest, 0, sig_len); sctp_auth_calculate_hmac(asoc, chunk->skb, - (struct sctp_auth_chunk *)chunk->chunk_hdr, - GFP_ATOMIC); + (struct sctp_auth_chunk *)chunk->chunk_hdr, + GFP_ATOMIC); /* Discard the packet if the digests do not match */ if (memcmp(save_digest, digest, sig_len)) { @@ -4156,14 +4158,14 @@ nomem: sctp_disposition_t sctp_sf_eat_auth(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { - struct sctp_authhdr *auth_hdr; struct sctp_chunk *chunk = arg; + struct sctp_authhdr *auth_hdr; struct sctp_chunk *err_chunk; - sctp_ierror_t error; + enum sctp_ierror error; /* Make sure that the peer has AUTH capable */ if (!asoc->peer.auth_capable) @@ -4253,7 +4255,7 @@ sctp_disposition_t sctp_sf_eat_auth(struct net *net, sctp_disposition_t sctp_sf_unk_chunk(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -4333,7 +4335,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, sctp_disposition_t sctp_sf_discard_chunk(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -4373,7 +4375,7 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net, sctp_disposition_t sctp_sf_pdiscard(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -4401,7 +4403,7 @@ sctp_disposition_t sctp_sf_pdiscard(struct net *net, sctp_disposition_t sctp_sf_violation(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -4454,11 +4456,10 @@ static sctp_disposition_t sctp_sf_abort_violation( /* Treat INIT-ACK as a special case during COOKIE-WAIT. */ if (chunk->chunk_hdr->type == SCTP_CID_INIT_ACK && !asoc->peer.i.init_tag) { - sctp_initack_chunk_t *initack; + struct sctp_initack_chunk *initack; - initack = (sctp_initack_chunk_t *)chunk->chunk_hdr; - if (!sctp_chunk_length_valid(chunk, - sizeof(sctp_initack_chunk_t))) + initack = (struct sctp_initack_chunk *)chunk->chunk_hdr; + if (!sctp_chunk_length_valid(chunk, sizeof(*initack))) abort->chunk_hdr->flags |= SCTP_CHUNK_FLAG_T; else { unsigned int inittag; @@ -4521,7 +4522,7 @@ nomem: * Handle a protocol violation when the chunk length is invalid. * "Invalid" length is identified as smaller than the minimal length a * given chunk can be. For example, a SACK chunk has invalid length - * if its length is set to be smaller than the size of sctp_sack_chunk_t. + * if its length is set to be smaller than the size of struct sctp_sack_chunk. * * We inform the other end by sending an ABORT with a Protocol Violation * error code. @@ -4540,7 +4541,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -4560,7 +4561,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, void *ext, sctp_cmd_seq_t *commands) { @@ -4603,7 +4604,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -4623,7 +4624,7 @@ static sctp_disposition_t sctp_sf_violation_chunk( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -4698,7 +4699,7 @@ static sctp_disposition_t sctp_sf_violation_chunk( sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -4810,7 +4811,7 @@ nomem: sctp_disposition_t sctp_sf_do_prm_send(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -4850,7 +4851,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -4906,7 +4907,7 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -4943,7 +4944,7 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( sctp_disposition_t sctp_sf_error_closed(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -4957,7 +4958,7 @@ sctp_disposition_t sctp_sf_error_closed(struct net *net, sctp_disposition_t sctp_sf_error_shutdown(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -4984,7 +4985,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5019,7 +5020,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { /* There is a single T1 timer, so we should be able to use @@ -5046,7 +5047,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5095,7 +5096,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5121,7 +5122,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5148,7 +5149,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5179,7 +5180,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5215,7 +5216,7 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5247,7 +5248,7 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat( sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5264,7 +5265,7 @@ sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net, sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; @@ -5282,7 +5283,7 @@ sctp_disposition_t sctp_sf_ignore_primitive( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5306,7 +5307,7 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5338,7 +5339,7 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5408,7 +5409,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5424,12 +5425,14 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( */ if (chunk) { if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, + commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + if (!sctp_chunk_length_valid( + chunk, sizeof(struct sctp_shutdown_chunk))) + return sctp_sf_violation_chunklen(net, ep, asoc, type, + arg, commands); } /* If it has no more outstanding DATA chunks, the SHUTDOWN receiver @@ -5479,7 +5482,7 @@ nomem: sctp_disposition_t sctp_sf_ignore_other(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5507,7 +5510,7 @@ sctp_disposition_t sctp_sf_ignore_other(struct net *net, sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5595,7 +5598,7 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net, sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5626,7 +5629,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net, sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5690,7 +5693,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net, sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5740,7 +5743,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net, sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5811,7 +5814,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5882,7 +5885,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire( sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5919,7 +5922,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5961,7 +5964,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( sctp_disposition_t sctp_sf_not_impl(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -5979,7 +5982,7 @@ sctp_disposition_t sctp_sf_not_impl(struct net *net, sctp_disposition_t sctp_sf_bug(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -6000,7 +6003,7 @@ sctp_disposition_t sctp_sf_bug(struct net *net, sctp_disposition_t sctp_sf_timer_ignore(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *commands) { @@ -6107,9 +6110,9 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, switch (chunk->chunk_hdr->type) { case SCTP_CID_INIT_ACK: { - sctp_initack_chunk_t *initack; + struct sctp_initack_chunk *initack; - initack = (sctp_initack_chunk_t *)chunk->chunk_hdr; + initack = (struct sctp_initack_chunk *)chunk->chunk_hdr; vtag = ntohl(initack->init_hdr.init_tag); break; } diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 3e958c1c4b95..d437f3801399 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -52,9 +52,10 @@ other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES]; static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES]; -static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, - enum sctp_cid cid, - sctp_state_t state); +static const sctp_sm_table_entry_t *sctp_chunk_event_lookup( + struct net *net, + enum sctp_cid cid, + enum sctp_state state); static const sctp_sm_table_entry_t bug = { @@ -76,10 +77,11 @@ static const sctp_sm_table_entry_t bug = { rtn; \ }) -const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, - sctp_event_t event_type, - sctp_state_t state, - sctp_subtype_t event_subtype) +const sctp_sm_table_entry_t *sctp_sm_lookup_event( + struct net *net, + enum sctp_event event_type, + enum sctp_state state, + union sctp_subtype event_subtype) { switch (event_type) { case SCTP_EVENT_T_CHUNK: @@ -967,9 +969,10 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE, }; -static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, - enum sctp_cid cid, - sctp_state_t state) +static const sctp_sm_table_entry_t *sctp_chunk_event_lookup( + struct net *net, + enum sctp_cid cid, + enum sctp_state state) { if (state > SCTP_STATE_MAX) return &bug; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1db478e34520..a1e2113806dd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1055,7 +1055,7 @@ static int __sctp_connect(struct sock *sk, struct sctp_association *asoc2; struct sctp_transport *transport; union sctp_addr to; - sctp_scope_t scope; + enum sctp_scope scope; long timeo; int err = 0; int addrcnt = 0; @@ -1610,7 +1610,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) struct sctp_initmsg *sinit; sctp_assoc_t associd = 0; sctp_cmsgs_t cmsgs = { NULL }; - sctp_scope_t scope; + enum sctp_scope scope; bool fill_sinfo_ttl = false, wait_connect = false; struct sctp_datamsg *datamsg; int msg_flags = msg->msg_flags; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 0e732f68c2bf..ef7ca44d6e6a 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -46,7 +46,7 @@ static int timer_max = 86400000; /* ms in one day */ static int int_max = INT_MAX; static int sack_timer_min = 1; static int sack_timer_max = 500; -static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ +static int addr_scope_max = SCTP_SCOPE_POLICY_MAX; static int rwnd_scale_max = 16; static int rto_alpha_min = 0; static int rto_beta_min = 0; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 80a97c8501a7..2d9bd3776bc8 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -490,7 +490,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, * detected. */ void sctp_transport_lower_cwnd(struct sctp_transport *transport, - sctp_lower_cwnd_t reason) + enum sctp_lower_cwnd reason) { struct sctp_association *asoc = transport->asoc; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 5f86c5062a98..67abc0194f30 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -371,19 +371,19 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc, struct sctp_chunk *chunk, __u16 flags, gfp_t gfp) { - struct sctp_ulpevent *event; struct sctp_remote_error *sre; + struct sctp_ulpevent *event; + struct sctp_errhdr *ch; struct sk_buff *skb; - sctp_errhdr_t *ch; __be16 cause; int elen; - ch = (sctp_errhdr_t *)(chunk->skb->data); + ch = (struct sctp_errhdr *)(chunk->skb->data); cause = ch->cause; - elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t); + elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(*ch); /* Pull off the ERROR header. */ - skb_pull(chunk->skb, sizeof(sctp_errhdr_t)); + skb_pull(chunk->skb, sizeof(*ch)); /* Copy the skb to a new skb with room for us to prepend * notification with. diff --git a/net/smc/Kconfig b/net/smc/Kconfig index 33954852f3f8..c717ef0896aa 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -8,10 +8,6 @@ config SMC The Linux implementation of the SMC-R solution is designed as a separate socket family SMC. - Warning: SMC will expose all memory for remote reads and writes - once a connection is established. Don't enable this option except - for tightly controlled lab environment. - Select this option if you want to run SMC socket applications config SMC_DIAG diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 6793d7348cc8..8c6d24b2995d 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -338,6 +338,12 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) return SMC_CLC_DECL_INTERR; smc_wr_remember_qp_attr(link); + + rc = smc_wr_reg_send(link, + smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]); + if (rc) + return SMC_CLC_DECL_INTERR; + /* send CONFIRM LINK response over RoCE fabric */ rc = smc_llc_send_confirm_link(link, link->smcibdev->mac[link->ibport - 1], @@ -430,12 +436,8 @@ static int smc_connect_rdma(struct smc_sock *smc) smc_conn_save_peer_info(smc, &aclc); - rc = smc_sndbuf_create(smc); - if (rc) { - reason_code = SMC_CLC_DECL_MEM; - goto decline_rdma_unlock; - } - rc = smc_rmb_create(smc); + /* create send buffer and rmb */ + rc = smc_buf_create(smc); if (rc) { reason_code = SMC_CLC_DECL_MEM; goto decline_rdma_unlock; @@ -459,7 +461,20 @@ static int smc_connect_rdma(struct smc_sock *smc) reason_code = SMC_CLC_DECL_INTERR; goto decline_rdma_unlock; } + } else { + struct smc_buf_desc *buf_desc = smc->conn.rmb_desc; + + if (!buf_desc->reused) { + /* register memory region for new rmb */ + rc = smc_wr_reg_send(link, + buf_desc->mr_rx[SMC_SINGLE_LINK]); + if (rc) { + reason_code = SMC_CLC_DECL_INTERR; + goto decline_rdma_unlock; + } + } } + smc_rmb_sync_sg_for_device(&smc->conn); rc = smc_clc_send_confirm(smc); if (rc) @@ -692,6 +707,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) int rc; link = &lgr->lnk[SMC_SINGLE_LINK]; + + rc = smc_wr_reg_send(link, + smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]); + if (rc) + return SMC_CLC_DECL_INTERR; + /* send CONFIRM LINK request to client over the RoCE fabric */ rc = smc_llc_send_confirm_link(link, link->smcibdev->mac[link->ibport - 1], @@ -779,11 +800,6 @@ static void smc_listen_work(struct work_struct *work) mutex_lock(&smc_create_lgr_pending); local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, smcibdev, ibport, &pclc.lcl, 0); - if (local_contact == SMC_REUSE_CONTACT) - /* lock no longer needed, free it due to following - * smc_clc_wait_msg() call - */ - mutex_unlock(&smc_create_lgr_pending); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) @@ -794,12 +810,8 @@ static void smc_listen_work(struct work_struct *work) } link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; - rc = smc_sndbuf_create(new_smc); - if (rc) { - reason_code = SMC_CLC_DECL_MEM; - goto decline_rdma; - } - rc = smc_rmb_create(new_smc); + /* create send buffer and rmb */ + rc = smc_buf_create(new_smc); if (rc) { reason_code = SMC_CLC_DECL_MEM; goto decline_rdma; @@ -808,6 +820,21 @@ static void smc_listen_work(struct work_struct *work) smc_close_init(new_smc); smc_rx_init(new_smc); + if (local_contact != SMC_FIRST_CONTACT) { + struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc; + + if (!buf_desc->reused) { + /* register memory region for new rmb */ + rc = smc_wr_reg_send(link, + buf_desc->mr_rx[SMC_SINGLE_LINK]); + if (rc) { + reason_code = SMC_CLC_DECL_INTERR; + goto decline_rdma; + } + } + } + smc_rmb_sync_sg_for_device(&new_smc->conn); + rc = smc_clc_send_accept(new_smc, local_contact); if (rc) goto out_err; @@ -853,8 +880,7 @@ out_connected: if (newsmcsk->sk_state == SMC_INIT) newsmcsk->sk_state = SMC_ACTIVE; enqueue: - if (local_contact == SMC_FIRST_CONTACT) - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_create_lgr_pending); lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); if (lsmc->sk.sk_state == SMC_LISTEN) { smc_accept_enqueue(&lsmc->sk, newsmcsk); diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 03ec058d18df..3934913ab835 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -204,13 +204,13 @@ int smc_clc_send_confirm(struct smc_sock *smc) memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(cclc.qpn, link->roce_qp->qp_num); cclc.rmb_rkey = - htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); + htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */ cclc.rmbe_alert_token = htonl(conn->alert_token_local); cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); cclc.rmbe_size = conn->rmbe_size_short; - cclc.rmb_dma_addr = - cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]); + cclc.rmb_dma_addr = cpu_to_be64( + (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); hton24(cclc.psn, link->psn_initial); memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); @@ -256,13 +256,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(aclc.qpn, link->roce_qp->qp_num); aclc.rmb_rkey = - htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); + htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */ aclc.rmbe_alert_token = htonl(conn->alert_token_local); aclc.qp_mtu = link->path_mtu; aclc.rmbe_size = conn->rmbe_size_short, - aclc.rmb_dma_addr = - cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]); + aclc.rmb_dma_addr = cpu_to_be64( + (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); hton24(aclc.psn, link->psn_initial); memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 3ac09a629ea1..1a16d51e2330 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -175,7 +175,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, rc = smc_wr_alloc_link_mem(lnk); if (rc) goto free_lgr; - init_waitqueue_head(&lnk->wr_tx_wait); rc = smc_ib_create_protection_domain(lnk); if (rc) goto free_link_mem; @@ -207,17 +206,14 @@ out: return rc; } -static void smc_sndbuf_unuse(struct smc_connection *conn) +static void smc_buf_unuse(struct smc_connection *conn) { if (conn->sndbuf_desc) { conn->sndbuf_desc->used = 0; conn->sndbuf_size = 0; } -} - -static void smc_rmb_unuse(struct smc_connection *conn) -{ if (conn->rmb_desc) { + conn->rmb_desc->reused = true; conn->rmb_desc->used = 0; conn->rmbe_size = 0; } @@ -232,8 +228,7 @@ void smc_conn_free(struct smc_connection *conn) return; smc_cdc_tx_dismiss_slots(conn); smc_lgr_unregister_conn(conn); - smc_rmb_unuse(conn); - smc_sndbuf_unuse(conn); + smc_buf_unuse(conn); } static void smc_link_clear(struct smc_link *lnk) @@ -246,48 +241,57 @@ static void smc_link_clear(struct smc_link *lnk) smc_wr_free_link_mem(lnk); } -static void smc_lgr_free_sndbufs(struct smc_link_group *lgr) +static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, + bool is_rmb) { - struct smc_buf_desc *sndbuf_desc, *bf_desc; - int i; - - for (i = 0; i < SMC_RMBE_SIZES; i++) { - list_for_each_entry_safe(sndbuf_desc, bf_desc, &lgr->sndbufs[i], - list) { - list_del(&sndbuf_desc->list); - smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev, - smc_uncompress_bufsize(i), - sndbuf_desc, DMA_TO_DEVICE); - kfree(sndbuf_desc->cpu_addr); - kfree(sndbuf_desc); - } + if (is_rmb) { + if (buf_desc->mr_rx[SMC_SINGLE_LINK]) + smc_ib_put_memory_region( + buf_desc->mr_rx[SMC_SINGLE_LINK]); + smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, + DMA_FROM_DEVICE); + } else { + smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, + DMA_TO_DEVICE); } + sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]); + if (buf_desc->cpu_addr) + free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order); + kfree(buf_desc); } -static void smc_lgr_free_rmbs(struct smc_link_group *lgr) +static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) { - struct smc_buf_desc *rmb_desc, *bf_desc; struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; + struct smc_buf_desc *buf_desc, *bf_desc; + struct list_head *buf_list; int i; for (i = 0; i < SMC_RMBE_SIZES; i++) { - list_for_each_entry_safe(rmb_desc, bf_desc, &lgr->rmbs[i], + if (is_rmb) + buf_list = &lgr->rmbs[i]; + else + buf_list = &lgr->sndbufs[i]; + list_for_each_entry_safe(buf_desc, bf_desc, buf_list, list) { - list_del(&rmb_desc->list); - smc_ib_buf_unmap(lnk->smcibdev, - smc_uncompress_bufsize(i), - rmb_desc, DMA_FROM_DEVICE); - kfree(rmb_desc->cpu_addr); - kfree(rmb_desc); + list_del(&buf_desc->list); + smc_buf_free(buf_desc, lnk, is_rmb); } } } +static void smc_lgr_free_bufs(struct smc_link_group *lgr) +{ + /* free send buffers */ + __smc_lgr_free_bufs(lgr, false); + /* free rmbs */ + __smc_lgr_free_bufs(lgr, true); +} + /* remove a link group */ void smc_lgr_free(struct smc_link_group *lgr) { - smc_lgr_free_rmbs(lgr); - smc_lgr_free_sndbufs(lgr); + smc_lgr_free_bufs(lgr); smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); kfree(lgr); } @@ -452,45 +456,25 @@ out: return rc ? rc : local_contact; } -/* try to reuse a sndbuf description slot of the sndbufs list for a certain - * buf_size; if not available, return NULL +/* try to reuse a sndbuf or rmb description slot for a certain + * buffer size; if not available, return NULL */ static inline -struct smc_buf_desc *smc_sndbuf_get_slot(struct smc_link_group *lgr, - int compressed_bufsize) +struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr, + int compressed_bufsize, + rwlock_t *lock, + struct list_head *buf_list) { - struct smc_buf_desc *sndbuf_slot; - - read_lock_bh(&lgr->sndbufs_lock); - list_for_each_entry(sndbuf_slot, &lgr->sndbufs[compressed_bufsize], - list) { - if (cmpxchg(&sndbuf_slot->used, 0, 1) == 0) { - read_unlock_bh(&lgr->sndbufs_lock); - return sndbuf_slot; - } - } - read_unlock_bh(&lgr->sndbufs_lock); - return NULL; -} + struct smc_buf_desc *buf_slot; -/* try to reuse an rmb description slot of the rmbs list for a certain - * rmbe_size; if not available, return NULL - */ -static inline -struct smc_buf_desc *smc_rmb_get_slot(struct smc_link_group *lgr, - int compressed_bufsize) -{ - struct smc_buf_desc *rmb_slot; - - read_lock_bh(&lgr->rmbs_lock); - list_for_each_entry(rmb_slot, &lgr->rmbs[compressed_bufsize], - list) { - if (cmpxchg(&rmb_slot->used, 0, 1) == 0) { - read_unlock_bh(&lgr->rmbs_lock); - return rmb_slot; + read_lock_bh(lock); + list_for_each_entry(buf_slot, buf_list, list) { + if (cmpxchg(&buf_slot->used, 0, 1) == 0) { + read_unlock_bh(lock); + return buf_slot; } } - read_unlock_bh(&lgr->rmbs_lock); + read_unlock_bh(lock); return NULL; } @@ -503,136 +487,186 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size) return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); } -/* create the tx buffer for an SMC socket */ -int smc_sndbuf_create(struct smc_sock *smc) +static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, + bool is_rmb, int bufsize) { - struct smc_connection *conn = &smc->conn; - struct smc_link_group *lgr = conn->lgr; - int tmp_bufsize, tmp_bufsize_short; - struct smc_buf_desc *sndbuf_desc; + struct smc_buf_desc *buf_desc; + struct smc_link *lnk; int rc; - /* use socket send buffer size (w/o overhead) as start value */ - for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2); - tmp_bufsize_short >= 0; tmp_bufsize_short--) { - tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short); - /* check for reusable sndbuf_slot in the link group */ - sndbuf_desc = smc_sndbuf_get_slot(lgr, tmp_bufsize_short); - if (sndbuf_desc) { - memset(sndbuf_desc->cpu_addr, 0, tmp_bufsize); - break; /* found reusable slot */ - } - /* try to alloc a new send buffer */ - sndbuf_desc = kzalloc(sizeof(*sndbuf_desc), GFP_KERNEL); - if (!sndbuf_desc) - break; /* give up with -ENOMEM */ - sndbuf_desc->cpu_addr = kzalloc(tmp_bufsize, - GFP_KERNEL | __GFP_NOWARN | - __GFP_NOMEMALLOC | - __GFP_NORETRY); - if (!sndbuf_desc->cpu_addr) { - kfree(sndbuf_desc); - sndbuf_desc = NULL; - /* if send buffer allocation has failed, - * try a smaller one - */ - continue; - } - rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev, - tmp_bufsize, sndbuf_desc, - DMA_TO_DEVICE); + /* try to alloc a new buffer */ + buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); + if (!buf_desc) + return ERR_PTR(-ENOMEM); + + buf_desc->cpu_addr = + (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | + __GFP_NOMEMALLOC | + __GFP_NORETRY | __GFP_ZERO, + get_order(bufsize)); + if (!buf_desc->cpu_addr) { + kfree(buf_desc); + return ERR_PTR(-EAGAIN); + } + buf_desc->order = get_order(bufsize); + + /* build the sg table from the pages */ + lnk = &lgr->lnk[SMC_SINGLE_LINK]; + rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, + GFP_KERNEL); + if (rc) { + smc_buf_free(buf_desc, lnk, is_rmb); + return ERR_PTR(rc); + } + sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, + buf_desc->cpu_addr, bufsize); + + /* map sg table to DMA address */ + rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc, + is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); + /* SMC protocol depends on mapping to one DMA address only */ + if (rc != 1) { + smc_buf_free(buf_desc, lnk, is_rmb); + return ERR_PTR(-EAGAIN); + } + + /* create a new memory region for the RMB */ + if (is_rmb) { + rc = smc_ib_get_memory_region(lnk->roce_pd, + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_LOCAL_WRITE, + buf_desc); if (rc) { - kfree(sndbuf_desc->cpu_addr); - kfree(sndbuf_desc); - sndbuf_desc = NULL; - continue; /* if mapping failed, try smaller one */ + smc_buf_free(buf_desc, lnk, is_rmb); + return ERR_PTR(rc); } - sndbuf_desc->used = 1; - write_lock_bh(&lgr->sndbufs_lock); - list_add(&sndbuf_desc->list, - &lgr->sndbufs[tmp_bufsize_short]); - write_unlock_bh(&lgr->sndbufs_lock); - break; - } - if (sndbuf_desc && sndbuf_desc->cpu_addr) { - conn->sndbuf_desc = sndbuf_desc; - conn->sndbuf_size = tmp_bufsize; - smc->sk.sk_sndbuf = tmp_bufsize * 2; - atomic_set(&conn->sndbuf_space, tmp_bufsize); - return 0; - } else { - return -ENOMEM; } + + return buf_desc; } -/* create the RMB for an SMC socket (even though the SMC protocol - * allows more than one RMB-element per RMB, the Linux implementation - * uses just one RMB-element per RMB, i.e. uses an extra RMB for every - * connection in a link group - */ -int smc_rmb_create(struct smc_sock *smc) +static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) { struct smc_connection *conn = &smc->conn; struct smc_link_group *lgr = conn->lgr; - int tmp_bufsize, tmp_bufsize_short; - struct smc_buf_desc *rmb_desc; - int rc; + struct smc_buf_desc *buf_desc = NULL; + struct list_head *buf_list; + int bufsize, bufsize_short; + int sk_buf_size; + rwlock_t *lock; + + if (is_rmb) + /* use socket recv buffer size (w/o overhead) as start value */ + sk_buf_size = smc->sk.sk_rcvbuf / 2; + else + /* use socket send buffer size (w/o overhead) as start value */ + sk_buf_size = smc->sk.sk_sndbuf / 2; + + for (bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2); + bufsize_short >= 0; bufsize_short--) { + + if (is_rmb) { + lock = &lgr->rmbs_lock; + buf_list = &lgr->rmbs[bufsize_short]; + } else { + lock = &lgr->sndbufs_lock; + buf_list = &lgr->sndbufs[bufsize_short]; + } + bufsize = smc_uncompress_bufsize(bufsize_short); + if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) + continue; - /* use socket recv buffer size (w/o overhead) as start value */ - for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_rcvbuf / 2); - tmp_bufsize_short >= 0; tmp_bufsize_short--) { - tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short); - /* check for reusable rmb_slot in the link group */ - rmb_desc = smc_rmb_get_slot(lgr, tmp_bufsize_short); - if (rmb_desc) { - memset(rmb_desc->cpu_addr, 0, tmp_bufsize); + /* check for reusable slot in the link group */ + buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list); + if (buf_desc) { + memset(buf_desc->cpu_addr, 0, bufsize); break; /* found reusable slot */ } - /* try to alloc a new RMB */ - rmb_desc = kzalloc(sizeof(*rmb_desc), GFP_KERNEL); - if (!rmb_desc) - break; /* give up with -ENOMEM */ - rmb_desc->cpu_addr = kzalloc(tmp_bufsize, - GFP_KERNEL | __GFP_NOWARN | - __GFP_NOMEMALLOC | - __GFP_NORETRY); - if (!rmb_desc->cpu_addr) { - kfree(rmb_desc); - rmb_desc = NULL; - /* if RMB allocation has failed, - * try a smaller one - */ + + buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize); + if (PTR_ERR(buf_desc) == -ENOMEM) + break; + if (IS_ERR(buf_desc)) continue; - } - rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev, - tmp_bufsize, rmb_desc, - DMA_FROM_DEVICE); - if (rc) { - kfree(rmb_desc->cpu_addr); - kfree(rmb_desc); - rmb_desc = NULL; - continue; /* if mapping failed, try smaller one */ - } - rmb_desc->rkey[SMC_SINGLE_LINK] = - lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey; - rmb_desc->used = 1; - write_lock_bh(&lgr->rmbs_lock); - list_add(&rmb_desc->list, - &lgr->rmbs[tmp_bufsize_short]); - write_unlock_bh(&lgr->rmbs_lock); - break; + + buf_desc->used = 1; + write_lock_bh(lock); + list_add(&buf_desc->list, buf_list); + write_unlock_bh(lock); + break; /* found */ } - if (rmb_desc && rmb_desc->cpu_addr) { - conn->rmb_desc = rmb_desc; - conn->rmbe_size = tmp_bufsize; - conn->rmbe_size_short = tmp_bufsize_short; - smc->sk.sk_rcvbuf = tmp_bufsize * 2; + + if (IS_ERR(buf_desc)) + return -ENOMEM; + + if (is_rmb) { + conn->rmb_desc = buf_desc; + conn->rmbe_size = bufsize; + conn->rmbe_size_short = bufsize_short; + smc->sk.sk_rcvbuf = bufsize * 2; atomic_set(&conn->bytes_to_rcv, 0); - conn->rmbe_update_limit = smc_rmb_wnd_update_limit(tmp_bufsize); - return 0; + conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); } else { - return -ENOMEM; + conn->sndbuf_desc = buf_desc; + conn->sndbuf_size = bufsize; + smc->sk.sk_sndbuf = bufsize * 2; + atomic_set(&conn->sndbuf_space, bufsize); } + return 0; +} + +void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) +{ + struct smc_link_group *lgr = conn->lgr; + + smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, + conn->sndbuf_desc, DMA_TO_DEVICE); +} + +void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) +{ + struct smc_link_group *lgr = conn->lgr; + + smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, + conn->sndbuf_desc, DMA_TO_DEVICE); +} + +void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) +{ + struct smc_link_group *lgr = conn->lgr; + + smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, + conn->rmb_desc, DMA_FROM_DEVICE); +} + +void smc_rmb_sync_sg_for_device(struct smc_connection *conn) +{ + struct smc_link_group *lgr = conn->lgr; + + smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, + conn->rmb_desc, DMA_FROM_DEVICE); +} + +/* create the send and receive buffer for an SMC socket; + * receive buffers are called RMBs; + * (even though the SMC protocol allows more than one RMB-element per RMB, + * the Linux implementation uses just one RMB-element per RMB, i.e. uses an + * extra RMB for every connection in a link group + */ +int smc_buf_create(struct smc_sock *smc) +{ + int rc; + + /* create send buffer */ + rc = __smc_buf_create(smc, false); + if (rc) + return rc; + /* create rmb */ + rc = __smc_buf_create(smc, true); + if (rc) + smc_buf_free(smc->conn.sndbuf_desc, + &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false); + return rc; } static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index b013cb43a327..19c44bf4e391 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -37,6 +37,14 @@ struct smc_wr_buf { u8 raw[SMC_WR_BUF_SIZE]; }; +#define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */ + +enum smc_wr_reg_state { + POSTED, /* ib_wr_reg_mr request posted */ + CONFIRMED, /* ib_wr_reg_mr response: successful */ + FAILED /* ib_wr_reg_mr response: failure */ +}; + struct smc_link { struct smc_ib_device *smcibdev; /* ib-device */ u8 ibport; /* port - values 1 | 2 */ @@ -65,6 +73,10 @@ struct smc_link { u64 wr_rx_id; /* seq # of last recv WR */ u32 wr_rx_cnt; /* number of WR recv buffers */ + struct ib_reg_wr wr_reg; /* WR register memory region */ + wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ + enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */ + union ib_gid gid; /* gid matching used vlan id */ u32 peer_qpn; /* QP number of peer */ enum ib_mtu path_mtu; /* used mtu */ @@ -90,14 +102,15 @@ struct smc_link { /* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */ struct smc_buf_desc { struct list_head list; - u64 dma_addr[SMC_LINKS_PER_LGR_MAX]; - /* mapped address of buffer */ void *cpu_addr; /* virtual address of buffer */ - u32 rkey[SMC_LINKS_PER_LGR_MAX]; - /* for rmb only: - * rkey provided to peer + struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */ + struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; + /* for rmb only: memory region + * incl. rkey provided to peer */ + u32 order; /* allocation order */ u32 used; /* currently used / unused */ + bool reused; /* new created / reused */ }; struct smc_rtoken { /* address/key of remote RMB */ @@ -173,9 +186,11 @@ struct smc_clc_msg_accept_confirm; void smc_lgr_free(struct smc_link_group *lgr); void smc_lgr_terminate(struct smc_link_group *lgr); -int smc_sndbuf_create(struct smc_sock *smc); -int smc_rmb_create(struct smc_sock *smc); +int smc_buf_create(struct smc_sock *smc); int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_clc_msg_accept_confirm *clc); - +void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); +void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); +void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); +void smc_rmb_sync_sg_for_device(struct smc_connection *conn); #endif diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index b31715505a35..547e0e113b17 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -13,6 +13,7 @@ #include <linux/random.h> #include <linux/workqueue.h> +#include <linux/scatterlist.h> #include <rdma/ib_verbs.h> #include "smc_pnet.h" @@ -192,8 +193,7 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) { int rc; - lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, - IB_PD_UNSAFE_GLOBAL_RKEY); + lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0); rc = PTR_ERR_OR_ZERO(lnk->roce_pd); if (IS_ERR(lnk->roce_pd)) lnk->roce_pd = NULL; @@ -232,10 +232,10 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) .recv_cq = lnk->smcibdev->roce_cq_recv, .srq = NULL, .cap = { - .max_send_wr = SMC_WR_BUF_CNT, /* include unsolicited rdma_writes as well, * there are max. 2 RDMA_WRITE per 1 WR_SEND */ + .max_send_wr = SMC_WR_BUF_CNT * 3, .max_recv_wr = SMC_WR_BUF_CNT * 3, .max_send_sge = SMC_IB_MAX_SEND_SGE, .max_recv_sge = 1, @@ -254,33 +254,117 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) return rc; } -/* map a new TX or RX buffer to DMA */ -int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size, - struct smc_buf_desc *buf_slot, - enum dma_data_direction data_direction) +void smc_ib_put_memory_region(struct ib_mr *mr) { - int rc = 0; + ib_dereg_mr(mr); +} - if (buf_slot->dma_addr[SMC_SINGLE_LINK]) - return rc; /* already mapped */ - buf_slot->dma_addr[SMC_SINGLE_LINK] = - ib_dma_map_single(smcibdev->ibdev, buf_slot->cpu_addr, - buf_size, data_direction); - if (ib_dma_mapping_error(smcibdev->ibdev, - buf_slot->dma_addr[SMC_SINGLE_LINK])) - rc = -EIO; - return rc; +static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot) +{ + unsigned int offset = 0; + int sg_num; + + /* map the largest prefix of a dma mapped SG list */ + sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK], + buf_slot->sgt[SMC_SINGLE_LINK].sgl, + buf_slot->sgt[SMC_SINGLE_LINK].orig_nents, + &offset, PAGE_SIZE); + + return sg_num; +} + +/* Allocate a memory region and map the dma mapped SG list of buf_slot */ +int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, + struct smc_buf_desc *buf_slot) +{ + if (buf_slot->mr_rx[SMC_SINGLE_LINK]) + return 0; /* already done */ + + buf_slot->mr_rx[SMC_SINGLE_LINK] = + ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order); + if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) { + int rc; + + rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]); + buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL; + return rc; + } + + if (smc_ib_map_mr_sg(buf_slot) != 1) + return -EINVAL; + + return 0; } -void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int buf_size, +/* synchronize buffer usage for cpu access */ +void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction) +{ + struct scatterlist *sg; + unsigned int i; + + /* for now there is just one DMA address */ + for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg, + buf_slot->sgt[SMC_SINGLE_LINK].nents, i) { + if (!sg_dma_len(sg)) + break; + ib_dma_sync_single_for_cpu(smcibdev->ibdev, + sg_dma_address(sg), + sg_dma_len(sg), + data_direction); + } +} + +/* synchronize buffer usage for device access */ +void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction) +{ + struct scatterlist *sg; + unsigned int i; + + /* for now there is just one DMA address */ + for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg, + buf_slot->sgt[SMC_SINGLE_LINK].nents, i) { + if (!sg_dma_len(sg)) + break; + ib_dma_sync_single_for_device(smcibdev->ibdev, + sg_dma_address(sg), + sg_dma_len(sg), + data_direction); + } +} + +/* Map a new TX or RX buffer SG-table to DMA */ +int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction) { - if (!buf_slot->dma_addr[SMC_SINGLE_LINK]) + int mapped_nents; + + mapped_nents = ib_dma_map_sg(smcibdev->ibdev, + buf_slot->sgt[SMC_SINGLE_LINK].sgl, + buf_slot->sgt[SMC_SINGLE_LINK].orig_nents, + data_direction); + if (!mapped_nents) + return -ENOMEM; + + return mapped_nents; +} + +void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction) +{ + if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address) return; /* already unmapped */ - ib_dma_unmap_single(smcibdev->ibdev, *buf_slot->dma_addr, buf_size, - data_direction); - buf_slot->dma_addr[SMC_SINGLE_LINK] = 0; + + ib_dma_unmap_sg(smcibdev->ibdev, + buf_slot->sgt[SMC_SINGLE_LINK].sgl, + buf_slot->sgt[SMC_SINGLE_LINK].orig_nents, + data_direction); + buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0; } static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index b567152a526d..9b927a33d5e6 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -51,12 +51,12 @@ int smc_ib_register_client(void) __init; void smc_ib_unregister_client(void); bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport); int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport); -int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size, - struct smc_buf_desc *buf_slot, - enum dma_data_direction data_direction); -void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int bufsize, +int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction); +void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction); void smc_ib_dealloc_protection_domain(struct smc_link *lnk); int smc_ib_create_protection_domain(struct smc_link *lnk); void smc_ib_destroy_queue_pair(struct smc_link *lnk); @@ -65,6 +65,13 @@ int smc_ib_ready_link(struct smc_link *lnk); int smc_ib_modify_qp_rts(struct smc_link *lnk); int smc_ib_modify_qp_reset(struct smc_link *lnk); long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev); - - +int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, + struct smc_buf_desc *buf_slot); +void smc_ib_put_memory_region(struct ib_mr *mr); +void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction); +void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction); #endif diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index f0c8b089f770..b17a333e9bb0 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -170,6 +170,7 @@ copy: copylen, conn->rmbe_size - cons.count); chunk_len_sum = chunk_len; chunk_off = cons.count; + smc_rmb_sync_sg_for_cpu(conn); for (chunk = 0; chunk < 2; chunk++) { if (!(flags & MSG_TRUNC)) { rc = memcpy_to_msg(msg, rcvbuf_base + chunk_off, @@ -177,6 +178,7 @@ copy: if (rc) { if (!read_done) read_done = -EFAULT; + smc_rmb_sync_sg_for_device(conn); goto out; } } @@ -190,6 +192,7 @@ copy: chunk_len_sum += chunk_len; chunk_off = 0; /* modulo offset in recv ring buffer */ } + smc_rmb_sync_sg_for_device(conn); /* update cursors */ if (!(flags & MSG_PEEK)) { diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 21ec1832ab51..3c656beb8820 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -174,10 +174,12 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) copylen, conn->sndbuf_size - tx_cnt_prep); chunk_len_sum = chunk_len; chunk_off = tx_cnt_prep; + smc_sndbuf_sync_sg_for_cpu(conn); for (chunk = 0; chunk < 2; chunk++) { rc = memcpy_from_msg(sndbuf_base + chunk_off, msg, chunk_len); if (rc) { + smc_sndbuf_sync_sg_for_device(conn); if (send_done) return send_done; goto out_err; @@ -192,6 +194,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) chunk_len_sum += chunk_len; chunk_off = 0; /* modulo offset in send ring buffer */ } + smc_sndbuf_sync_sg_for_device(conn); /* update cursors */ smc_curs_add(conn->sndbuf_size, &prep, copylen); smc_curs_write(&conn->tx_curs_prep, @@ -277,6 +280,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) struct smc_link_group *lgr = conn->lgr; int to_send, rmbespace; struct smc_link *link; + dma_addr_t dma_addr; int num_sges; int rc; @@ -334,12 +338,11 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) src_len = conn->sndbuf_size - sent.count; } src_len_sum = src_len; + dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); for (dstchunk = 0; dstchunk < 2; dstchunk++) { num_sges = 0; for (srcchunk = 0; srcchunk < 2; srcchunk++) { - sges[srcchunk].addr = - conn->sndbuf_desc->dma_addr[SMC_SINGLE_LINK] + - src_off; + sges[srcchunk].addr = dma_addr + src_off; sges[srcchunk].length = src_len; sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; num_sges++; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 874ee9f9d796..ab56bda66783 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -68,6 +68,16 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) int i; link = wc->qp->qp_context; + + if (wc->opcode == IB_WC_REG_MR) { + if (wc->status) + link->wr_reg_state = FAILED; + else + link->wr_reg_state = CONFIRMED; + wake_up(&link->wr_reg_wait); + return; + } + pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id); if (pnd_snd_idx == link->wr_tx_cnt) return; @@ -243,6 +253,52 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) return rc; } +/* Register a memory region and wait for result. */ +int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) +{ + struct ib_send_wr *failed_wr = NULL; + int rc; + + ib_req_notify_cq(link->smcibdev->roce_cq_send, + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); + link->wr_reg_state = POSTED; + link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr; + link->wr_reg.mr = mr; + link->wr_reg.key = mr->rkey; + failed_wr = &link->wr_reg.wr; + rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr); + WARN_ON(failed_wr != &link->wr_reg.wr); + if (rc) + return rc; + + rc = wait_event_interruptible_timeout(link->wr_reg_wait, + (link->wr_reg_state != POSTED), + SMC_WR_REG_MR_WAIT_TIME); + if (!rc) { + /* timeout - terminate connections */ + struct smc_link_group *lgr; + + lgr = container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + smc_lgr_terminate(lgr); + return -EPIPE; + } + if (rc == -ERESTARTSYS) + return -EINTR; + switch (link->wr_reg_state) { + case CONFIRMED: + rc = 0; + break; + case FAILED: + rc = -EIO; + break; + case POSTED: + rc = -EPIPE; + break; + } + return rc; +} + void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, @@ -458,6 +514,11 @@ static void smc_wr_init_sge(struct smc_link *lnk) lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i]; lnk->wr_rx_ibs[i].num_sge = 1; } + lnk->wr_reg.wr.next = NULL; + lnk->wr_reg.wr.num_sge = 0; + lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED; + lnk->wr_reg.wr.opcode = IB_WR_REG_MR; + lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE; } void smc_wr_free_link(struct smc_link *lnk) @@ -602,6 +663,8 @@ int smc_wr_create_link(struct smc_link *lnk) smc_wr_init_sge(lnk); memset(lnk->wr_tx_mask, 0, BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); + init_waitqueue_head(&lnk->wr_tx_wait); + init_waitqueue_head(&lnk->wr_reg_wait); return rc; dma_unmap: diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 0b9beeda6053..45eb53833052 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -102,5 +102,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); int smc_wr_rx_post_init(struct smc_link *link); void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context); +int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr); #endif /* SMC_WR_H */ diff --git a/net/socket.c b/net/socket.c index ad22df1ffbd1..b332d1e8e4e4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -652,6 +652,20 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(kernel_sendmsg); +int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, + struct kvec *vec, size_t num, size_t size) +{ + struct socket *sock = sk->sk_socket; + + if (!sock->ops->sendmsg_locked) + sock_no_sendmsg_locked(sk, msg, size); + + iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size); + + return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg)); +} +EXPORT_SYMBOL(kernel_sendmsg_locked); + static bool skb_is_err_queue(const struct sk_buff *skb) { /* pkt_type of skbs enqueued on the error queue are set to @@ -3376,6 +3390,19 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage); +int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, + size_t size, int flags) +{ + struct socket *sock = sk->sk_socket; + + if (sock->ops->sendpage_locked) + return sock->ops->sendpage_locked(sk, page, offset, size, + flags); + + return sock_no_sendpage_locked(sk, page, offset, size, flags); +} +EXPORT_SYMBOL(kernel_sendpage_locked); + int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) { mm_segment_t oldfs = get_fs(); @@ -3405,7 +3432,6 @@ u32 kernel_sock_ip_overhead(struct sock *sk) struct inet_sock *inet; struct ip_options_rcu *opt; u32 overhead = 0; - bool owned_by_user; #if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *np; struct ipv6_txoptions *optv6 = NULL; @@ -3414,13 +3440,12 @@ u32 kernel_sock_ip_overhead(struct sock *sk) if (!sk) return overhead; - owned_by_user = sock_owned_by_user(sk); switch (sk->sk_family) { case AF_INET: inet = inet_sk(sk); overhead += sizeof(struct iphdr); opt = rcu_dereference_protected(inet->inet_opt, - owned_by_user); + sock_owned_by_user(sk)); if (opt) overhead += opt->opt.optlen; return overhead; @@ -3430,7 +3455,7 @@ u32 kernel_sock_ip_overhead(struct sock *sk) overhead += sizeof(struct ipv6hdr); if (np) optv6 = rcu_dereference_protected(np->opt, - owned_by_user); + sock_owned_by_user(sk)); if (optv6) overhead += (optv6->opt_flen + optv6->opt_nflen); return overhead; diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index b5c279b22680..0d18fbc6f870 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -29,44 +29,46 @@ static struct workqueue_struct *strp_wq; -struct _strp_rx_msg { - /* Internal cb structure. struct strp_rx_msg must be first for passing +struct _strp_msg { + /* Internal cb structure. struct strp_msg must be first for passing * to upper layer. */ - struct strp_rx_msg strp; + struct strp_msg strp; int accum_len; int early_eaten; }; -static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb) +static inline struct _strp_msg *_strp_msg(struct sk_buff *skb) { - return (struct _strp_rx_msg *)((void *)skb->cb + + return (struct _strp_msg *)((void *)skb->cb + offsetof(struct qdisc_skb_cb, data)); } /* Lower lock held */ -static void strp_abort_rx_strp(struct strparser *strp, int err) +static void strp_abort_strp(struct strparser *strp, int err) { - struct sock *csk = strp->sk; - /* Unrecoverable error in receive */ - del_timer(&strp->rx_msg_timer); + del_timer(&strp->msg_timer); - if (strp->rx_stopped) + if (strp->stopped) return; - strp->rx_stopped = 1; + strp->stopped = 1; + + if (strp->sk) { + struct sock *sk = strp->sk; - /* Report an error on the lower socket */ - csk->sk_err = err; - csk->sk_error_report(csk); + /* Report an error on the lower socket */ + sk->sk_err = err; + sk->sk_error_report(sk); + } } -static void strp_start_rx_timer(struct strparser *strp) +static void strp_start_timer(struct strparser *strp, long timeo) { - if (strp->sk->sk_rcvtimeo) - mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo); + if (timeo) + mod_timer(&strp->msg_timer, timeo); } /* Lower lock held */ @@ -74,46 +76,55 @@ static void strp_parser_err(struct strparser *strp, int err, read_descriptor_t *desc) { desc->error = err; - kfree_skb(strp->rx_skb_head); - strp->rx_skb_head = NULL; + kfree_skb(strp->skb_head); + strp->skb_head = NULL; strp->cb.abort_parser(strp, err); } static inline int strp_peek_len(struct strparser *strp) { - struct socket *sock = strp->sk->sk_socket; + if (strp->sk) { + struct socket *sock = strp->sk->sk_socket; + + return sock->ops->peek_len(sock); + } + + /* If we don't have an associated socket there's nothing to peek. + * Return int max to avoid stopping the strparser. + */ - return sock->ops->peek_len(sock); + return INT_MAX; } /* Lower socket lock held */ -static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, - unsigned int orig_offset, size_t orig_len) +static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len, + size_t max_msg_size, long timeo) { struct strparser *strp = (struct strparser *)desc->arg.data; - struct _strp_rx_msg *rxm; + struct _strp_msg *stm; struct sk_buff *head, *skb; size_t eaten = 0, cand_len; ssize_t extra; int err; bool cloned_orig = false; - if (strp->rx_paused) + if (strp->paused) return 0; - head = strp->rx_skb_head; + head = strp->skb_head; if (head) { /* Message already in progress */ - rxm = _strp_rx_msg(head); - if (unlikely(rxm->early_eaten)) { + stm = _strp_msg(head); + if (unlikely(stm->early_eaten)) { /* Already some number of bytes on the receive sock - * data saved in rx_skb_head, just indicate they + * data saved in skb_head, just indicate they * are consumed. */ - eaten = orig_len <= rxm->early_eaten ? - orig_len : rxm->early_eaten; - rxm->early_eaten -= eaten; + eaten = orig_len <= stm->early_eaten ? + orig_len : stm->early_eaten; + stm->early_eaten -= eaten; return eaten; } @@ -126,12 +137,12 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, */ orig_skb = skb_clone(orig_skb, GFP_ATOMIC); if (!orig_skb) { - STRP_STATS_INCR(strp->stats.rx_mem_fail); + STRP_STATS_INCR(strp->stats.mem_fail); desc->error = -ENOMEM; return 0; } if (!pskb_pull(orig_skb, orig_offset)) { - STRP_STATS_INCR(strp->stats.rx_mem_fail); + STRP_STATS_INCR(strp->stats.mem_fail); kfree_skb(orig_skb); desc->error = -ENOMEM; return 0; @@ -140,13 +151,13 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, orig_offset = 0; } - if (!strp->rx_skb_nextp) { + if (!strp->skb_nextp) { /* We are going to append to the frags_list of head. * Need to unshare the frag_list. */ err = skb_unclone(head, GFP_ATOMIC); if (err) { - STRP_STATS_INCR(strp->stats.rx_mem_fail); + STRP_STATS_INCR(strp->stats.mem_fail); desc->error = err; return 0; } @@ -165,20 +176,20 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, skb = alloc_skb(0, GFP_ATOMIC); if (!skb) { - STRP_STATS_INCR(strp->stats.rx_mem_fail); + STRP_STATS_INCR(strp->stats.mem_fail); desc->error = -ENOMEM; return 0; } skb->len = head->len; skb->data_len = head->len; skb->truesize = head->truesize; - *_strp_rx_msg(skb) = *_strp_rx_msg(head); - strp->rx_skb_nextp = &head->next; + *_strp_msg(skb) = *_strp_msg(head); + strp->skb_nextp = &head->next; skb_shinfo(skb)->frag_list = head; - strp->rx_skb_head = skb; + strp->skb_head = skb; head = skb; } else { - strp->rx_skb_nextp = + strp->skb_nextp = &skb_shinfo(head)->frag_list; } } @@ -188,112 +199,112 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, /* Always clone since we will consume something */ skb = skb_clone(orig_skb, GFP_ATOMIC); if (!skb) { - STRP_STATS_INCR(strp->stats.rx_mem_fail); + STRP_STATS_INCR(strp->stats.mem_fail); desc->error = -ENOMEM; break; } cand_len = orig_len - eaten; - head = strp->rx_skb_head; + head = strp->skb_head; if (!head) { head = skb; - strp->rx_skb_head = head; - /* Will set rx_skb_nextp on next packet if needed */ - strp->rx_skb_nextp = NULL; - rxm = _strp_rx_msg(head); - memset(rxm, 0, sizeof(*rxm)); - rxm->strp.offset = orig_offset + eaten; + strp->skb_head = head; + /* Will set skb_nextp on next packet if needed */ + strp->skb_nextp = NULL; + stm = _strp_msg(head); + memset(stm, 0, sizeof(*stm)); + stm->strp.offset = orig_offset + eaten; } else { /* Unclone since we may be appending to an skb that we * already share a frag_list with. */ err = skb_unclone(skb, GFP_ATOMIC); if (err) { - STRP_STATS_INCR(strp->stats.rx_mem_fail); + STRP_STATS_INCR(strp->stats.mem_fail); desc->error = err; break; } - rxm = _strp_rx_msg(head); - *strp->rx_skb_nextp = skb; - strp->rx_skb_nextp = &skb->next; + stm = _strp_msg(head); + *strp->skb_nextp = skb; + strp->skb_nextp = &skb->next; head->data_len += skb->len; head->len += skb->len; head->truesize += skb->truesize; } - if (!rxm->strp.full_len) { + if (!stm->strp.full_len) { ssize_t len; len = (*strp->cb.parse_msg)(strp, head); if (!len) { /* Need more header to determine length */ - if (!rxm->accum_len) { + if (!stm->accum_len) { /* Start RX timer for new message */ - strp_start_rx_timer(strp); + strp_start_timer(strp, timeo); } - rxm->accum_len += cand_len; + stm->accum_len += cand_len; eaten += cand_len; - STRP_STATS_INCR(strp->stats.rx_need_more_hdr); + STRP_STATS_INCR(strp->stats.need_more_hdr); WARN_ON(eaten != orig_len); break; } else if (len < 0) { - if (len == -ESTRPIPE && rxm->accum_len) { + if (len == -ESTRPIPE && stm->accum_len) { len = -ENODATA; - strp->rx_unrecov_intr = 1; + strp->unrecov_intr = 1; } else { - strp->rx_interrupted = 1; + strp->interrupted = 1; } strp_parser_err(strp, len, desc); break; - } else if (len > strp->sk->sk_rcvbuf) { + } else if (len > max_msg_size) { /* Message length exceeds maximum allowed */ - STRP_STATS_INCR(strp->stats.rx_msg_too_big); + STRP_STATS_INCR(strp->stats.msg_too_big); strp_parser_err(strp, -EMSGSIZE, desc); break; } else if (len <= (ssize_t)head->len - - skb->len - rxm->strp.offset) { + skb->len - stm->strp.offset) { /* Length must be into new skb (and also * greater than zero) */ - STRP_STATS_INCR(strp->stats.rx_bad_hdr_len); + STRP_STATS_INCR(strp->stats.bad_hdr_len); strp_parser_err(strp, -EPROTO, desc); break; } - rxm->strp.full_len = len; + stm->strp.full_len = len; } - extra = (ssize_t)(rxm->accum_len + cand_len) - - rxm->strp.full_len; + extra = (ssize_t)(stm->accum_len + cand_len) - + stm->strp.full_len; if (extra < 0) { /* Message not complete yet. */ - if (rxm->strp.full_len - rxm->accum_len > + if (stm->strp.full_len - stm->accum_len > strp_peek_len(strp)) { - /* Don't have the whole messages in the socket - * buffer. Set strp->rx_need_bytes to wait for + /* Don't have the whole message in the socket + * buffer. Set strp->need_bytes to wait for * the rest of the message. Also, set "early * eaten" since we've already buffered the skb * but don't consume yet per strp_read_sock. */ - if (!rxm->accum_len) { + if (!stm->accum_len) { /* Start RX timer for new message */ - strp_start_rx_timer(strp); + strp_start_timer(strp, timeo); } - strp->rx_need_bytes = rxm->strp.full_len - - rxm->accum_len; - rxm->accum_len += cand_len; - rxm->early_eaten = cand_len; - STRP_STATS_ADD(strp->stats.rx_bytes, cand_len); + strp->need_bytes = stm->strp.full_len - + stm->accum_len; + stm->accum_len += cand_len; + stm->early_eaten = cand_len; + STRP_STATS_ADD(strp->stats.bytes, cand_len); desc->count = 0; /* Stop reading socket */ break; } - rxm->accum_len += cand_len; + stm->accum_len += cand_len; eaten += cand_len; WARN_ON(eaten != orig_len); break; @@ -308,14 +319,14 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, eaten += (cand_len - extra); /* Hurray, we have a new message! */ - del_timer(&strp->rx_msg_timer); - strp->rx_skb_head = NULL; - STRP_STATS_INCR(strp->stats.rx_msgs); + del_timer(&strp->msg_timer); + strp->skb_head = NULL; + STRP_STATS_INCR(strp->stats.msgs); /* Give skb to upper layer */ strp->cb.rcv_msg(strp, head); - if (unlikely(strp->rx_paused)) { + if (unlikely(strp->paused)) { /* Upper layer paused strp */ break; } @@ -324,11 +335,33 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, if (cloned_orig) kfree_skb(orig_skb); - STRP_STATS_ADD(strp->stats.rx_bytes, eaten); + STRP_STATS_ADD(strp->stats.bytes, eaten); return eaten; } +int strp_process(struct strparser *strp, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len, + size_t max_msg_size, long timeo) +{ + read_descriptor_t desc; /* Dummy arg to strp_recv */ + + desc.arg.data = strp; + + return __strp_recv(&desc, orig_skb, orig_offset, orig_len, + max_msg_size, timeo); +} +EXPORT_SYMBOL_GPL(strp_process); + +static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len) +{ + struct strparser *strp = (struct strparser *)desc->arg.data; + + return __strp_recv(desc, orig_skb, orig_offset, orig_len, + strp->sk->sk_rcvbuf, strp->sk->sk_rcvtimeo); +} + static int default_read_sock_done(struct strparser *strp, int err) { return err; @@ -355,101 +388,129 @@ static int strp_read_sock(struct strparser *strp) /* Lower sock lock held */ void strp_data_ready(struct strparser *strp) { - if (unlikely(strp->rx_stopped)) + if (unlikely(strp->stopped)) return; - /* This check is needed to synchronize with do_strp_rx_work. - * do_strp_rx_work acquires a process lock (lock_sock) whereas + /* This check is needed to synchronize with do_strp_work. + * do_strp_work acquires a process lock (lock_sock) whereas * the lock held here is bh_lock_sock. The two locks can be * held by different threads at the same time, but bh_lock_sock * allows a thread in BH context to safely check if the process * lock is held. In this case, if the lock is held, queue work. */ if (sock_owned_by_user(strp->sk)) { - queue_work(strp_wq, &strp->rx_work); + queue_work(strp_wq, &strp->work); return; } - if (strp->rx_paused) + if (strp->paused) return; - if (strp->rx_need_bytes) { - if (strp_peek_len(strp) >= strp->rx_need_bytes) - strp->rx_need_bytes = 0; + if (strp->need_bytes) { + if (strp_peek_len(strp) >= strp->need_bytes) + strp->need_bytes = 0; else return; } if (strp_read_sock(strp) == -ENOMEM) - queue_work(strp_wq, &strp->rx_work); + queue_work(strp_wq, &strp->work); } EXPORT_SYMBOL_GPL(strp_data_ready); -static void do_strp_rx_work(struct strparser *strp) +static void do_strp_work(struct strparser *strp) { read_descriptor_t rd_desc; - struct sock *csk = strp->sk; /* We need the read lock to synchronize with strp_data_ready. We * need the socket lock for calling strp_read_sock. */ - lock_sock(csk); + strp->cb.lock(strp); - if (unlikely(strp->rx_stopped)) + if (unlikely(strp->stopped)) goto out; - if (strp->rx_paused) + if (strp->paused) goto out; rd_desc.arg.data = strp; if (strp_read_sock(strp) == -ENOMEM) - queue_work(strp_wq, &strp->rx_work); + queue_work(strp_wq, &strp->work); out: - release_sock(csk); + strp->cb.unlock(strp); } -static void strp_rx_work(struct work_struct *w) +static void strp_work(struct work_struct *w) { - do_strp_rx_work(container_of(w, struct strparser, rx_work)); + do_strp_work(container_of(w, struct strparser, work)); } -static void strp_rx_msg_timeout(unsigned long arg) +static void strp_msg_timeout(unsigned long arg) { struct strparser *strp = (struct strparser *)arg; /* Message assembly timed out */ - STRP_STATS_INCR(strp->stats.rx_msg_timeouts); - lock_sock(strp->sk); + STRP_STATS_INCR(strp->stats.msg_timeouts); + strp->cb.lock(strp); strp->cb.abort_parser(strp, ETIMEDOUT); + strp->cb.unlock(strp); +} + +static void strp_sock_lock(struct strparser *strp) +{ + lock_sock(strp->sk); +} + +static void strp_sock_unlock(struct strparser *strp) +{ release_sock(strp->sk); } -int strp_init(struct strparser *strp, struct sock *csk, +int strp_init(struct strparser *strp, struct sock *sk, struct strp_callbacks *cb) { - struct socket *sock = csk->sk_socket; if (!cb || !cb->rcv_msg || !cb->parse_msg) return -EINVAL; - if (!sock->ops->read_sock || !sock->ops->peek_len) - return -EAFNOSUPPORT; + /* The sk (sock) arg determines the mode of the stream parser. + * + * If the sock is set then the strparser is in receive callback mode. + * The upper layer calls strp_data_ready to kick receive processing + * and strparser calls the read_sock function on the socket to + * get packets. + * + * If the sock is not set then the strparser is in general mode. + * The upper layer calls strp_process for each skb to be parsed. + */ - memset(strp, 0, sizeof(*strp)); + if (sk) { + struct socket *sock = sk->sk_socket; - strp->sk = csk; + if (!sock->ops->read_sock || !sock->ops->peek_len) + return -EAFNOSUPPORT; + } else { + if (!cb->lock || !cb->unlock) + return -EINVAL; + } - setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout, - (unsigned long)strp); + memset(strp, 0, sizeof(*strp)); - INIT_WORK(&strp->rx_work, strp_rx_work); + strp->sk = sk; + strp->cb.lock = cb->lock ? : strp_sock_lock; + strp->cb.unlock = cb->unlock ? : strp_sock_unlock; strp->cb.rcv_msg = cb->rcv_msg; strp->cb.parse_msg = cb->parse_msg; strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done; - strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp; + strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp; + + setup_timer(&strp->msg_timer, strp_msg_timeout, + (unsigned long)strp); + + INIT_WORK(&strp->work, strp_work); return 0; } @@ -457,12 +518,12 @@ EXPORT_SYMBOL_GPL(strp_init); void strp_unpause(struct strparser *strp) { - strp->rx_paused = 0; + strp->paused = 0; - /* Sync setting rx_paused with RX work */ + /* Sync setting paused with RX work */ smp_mb(); - queue_work(strp_wq, &strp->rx_work); + queue_work(strp_wq, &strp->work); } EXPORT_SYMBOL_GPL(strp_unpause); @@ -471,27 +532,27 @@ EXPORT_SYMBOL_GPL(strp_unpause); */ void strp_done(struct strparser *strp) { - WARN_ON(!strp->rx_stopped); + WARN_ON(!strp->stopped); - del_timer_sync(&strp->rx_msg_timer); - cancel_work_sync(&strp->rx_work); + del_timer_sync(&strp->msg_timer); + cancel_work_sync(&strp->work); - if (strp->rx_skb_head) { - kfree_skb(strp->rx_skb_head); - strp->rx_skb_head = NULL; + if (strp->skb_head) { + kfree_skb(strp->skb_head); + strp->skb_head = NULL; } } EXPORT_SYMBOL_GPL(strp_done); void strp_stop(struct strparser *strp) { - strp->rx_stopped = 1; + strp->stopped = 1; } EXPORT_SYMBOL_GPL(strp_stop); void strp_check_rcv(struct strparser *strp) { - queue_work(strp_wq, &strp->rx_work); + queue_work(strp_wq, &strp->work); } EXPORT_SYMBOL_GPL(strp_check_rcv); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 25dc67ef9d37..0531b41d1f2d 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -343,8 +343,6 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj) switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: return sizeof(struct switchdev_obj_port_vlan); - case SWITCHDEV_OBJ_ID_PORT_FDB: - return sizeof(struct switchdev_obj_port_fdb); case SWITCHDEV_OBJ_ID_PORT_MDB: return sizeof(struct switchdev_obj_port_mdb); default: @@ -534,43 +532,6 @@ int switchdev_port_obj_del(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_obj_del); -/** - * switchdev_port_obj_dump - Dump port objects - * - * @dev: port device - * @id: object ID - * @obj: object to dump - * @cb: function to call with a filled object - * - * rtnl_lock must be held. - */ -int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb) -{ - const struct switchdev_ops *ops = dev->switchdev_ops; - struct net_device *lower_dev; - struct list_head *iter; - int err = -EOPNOTSUPP; - - ASSERT_RTNL(); - - if (ops && ops->switchdev_port_obj_dump) - return ops->switchdev_port_obj_dump(dev, obj, cb); - - /* Switch device port(s) may be stacked under - * bond/team/vlan dev, so recurse down to dump objects on - * first port at bottom of stack. - */ - - netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = switchdev_port_obj_dump(lower_dev, obj, cb); - break; - } - - return err; -} -EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); - static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); /** @@ -613,486 +574,6 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, } EXPORT_SYMBOL_GPL(call_switchdev_notifiers); -struct switchdev_vlan_dump { - struct switchdev_obj_port_vlan vlan; - struct sk_buff *skb; - u32 filter_mask; - u16 flags; - u16 begin; - u16 end; -}; - -static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump) -{ - struct bridge_vlan_info vinfo; - - vinfo.flags = dump->flags; - - if (dump->begin == 0 && dump->end == 0) { - return 0; - } else if (dump->begin == dump->end) { - vinfo.vid = dump->begin; - if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, - sizeof(vinfo), &vinfo)) - return -EMSGSIZE; - } else { - vinfo.vid = dump->begin; - vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; - if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, - sizeof(vinfo), &vinfo)) - return -EMSGSIZE; - vinfo.vid = dump->end; - vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; - vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END; - if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, - sizeof(vinfo), &vinfo)) - return -EMSGSIZE; - } - - return 0; -} - -static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj) -{ - struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); - struct switchdev_vlan_dump *dump = - container_of(vlan, struct switchdev_vlan_dump, vlan); - int err = 0; - - if (vlan->vid_begin > vlan->vid_end) - return -EINVAL; - - if (dump->filter_mask & RTEXT_FILTER_BRVLAN) { - dump->flags = vlan->flags; - for (dump->begin = dump->end = vlan->vid_begin; - dump->begin <= vlan->vid_end; - dump->begin++, dump->end++) { - err = switchdev_port_vlan_dump_put(dump); - if (err) - return err; - } - } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) { - if (dump->begin > vlan->vid_begin && - dump->begin >= vlan->vid_end) { - if ((dump->begin - 1) == vlan->vid_end && - dump->flags == vlan->flags) { - /* prepend */ - dump->begin = vlan->vid_begin; - } else { - err = switchdev_port_vlan_dump_put(dump); - dump->flags = vlan->flags; - dump->begin = vlan->vid_begin; - dump->end = vlan->vid_end; - } - } else if (dump->end <= vlan->vid_begin && - dump->end < vlan->vid_end) { - if ((dump->end + 1) == vlan->vid_begin && - dump->flags == vlan->flags) { - /* append */ - dump->end = vlan->vid_end; - } else { - err = switchdev_port_vlan_dump_put(dump); - dump->flags = vlan->flags; - dump->begin = vlan->vid_begin; - dump->end = vlan->vid_end; - } - } else { - err = -EINVAL; - } - } - - return err; -} - -static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, - u32 filter_mask) -{ - struct switchdev_vlan_dump dump = { - .vlan.obj.orig_dev = dev, - .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, - .skb = skb, - .filter_mask = filter_mask, - }; - int err = 0; - - if ((filter_mask & RTEXT_FILTER_BRVLAN) || - (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { - err = switchdev_port_obj_dump(dev, &dump.vlan.obj, - switchdev_port_vlan_dump_cb); - if (err) - goto err_out; - if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) - /* last one */ - err = switchdev_port_vlan_dump_put(&dump); - } - -err_out: - return err == -EOPNOTSUPP ? 0 : err; -} - -/** - * switchdev_port_bridge_getlink - Get bridge port attributes - * - * @dev: port device - * - * Called for SELF on rtnl_bridge_getlink to get bridge port - * attributes. - */ -int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, u32 filter_mask, - int nlflags) -{ - struct switchdev_attr attr = { - .orig_dev = dev, - .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, - }; - u16 mode = BRIDGE_MODE_UNDEF; - u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD; - int err; - - if (!netif_is_bridge_port(dev)) - return -EOPNOTSUPP; - - err = switchdev_port_attr_get(dev, &attr); - if (err && err != -EOPNOTSUPP) - return err; - - return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, - attr.u.brport_flags, mask, nlflags, - filter_mask, switchdev_port_vlan_fill); -} -EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink); - -static int switchdev_port_br_setflag(struct net_device *dev, - struct nlattr *nlattr, - unsigned long brport_flag) -{ - struct switchdev_attr attr = { - .orig_dev = dev, - .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, - }; - u8 flag = nla_get_u8(nlattr); - int err; - - err = switchdev_port_attr_get(dev, &attr); - if (err) - return err; - - if (flag) - attr.u.brport_flags |= brport_flag; - else - attr.u.brport_flags &= ~brport_flag; - - return switchdev_port_attr_set(dev, &attr); -} - -static const struct nla_policy -switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = { - [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, - [IFLA_BRPORT_COST] = { .type = NLA_U32 }, - [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, - [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, - [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, - [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, - [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 }, - [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, - [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 }, - [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, -}; - -static int switchdev_port_br_setlink_protinfo(struct net_device *dev, - struct nlattr *protinfo) -{ - struct nlattr *attr; - int rem; - int err; - - err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX, - switchdev_port_bridge_policy, NULL); - if (err) - return err; - - nla_for_each_nested(attr, protinfo, rem) { - switch (nla_type(attr)) { - case IFLA_BRPORT_LEARNING: - err = switchdev_port_br_setflag(dev, attr, - BR_LEARNING); - break; - case IFLA_BRPORT_LEARNING_SYNC: - err = switchdev_port_br_setflag(dev, attr, - BR_LEARNING_SYNC); - break; - case IFLA_BRPORT_UNICAST_FLOOD: - err = switchdev_port_br_setflag(dev, attr, BR_FLOOD); - break; - default: - err = -EOPNOTSUPP; - break; - } - if (err) - return err; - } - - return 0; -} - -static int switchdev_port_br_afspec(struct net_device *dev, - struct nlattr *afspec, - int (*f)(struct net_device *dev, - const struct switchdev_obj *obj)) -{ - struct nlattr *attr; - struct bridge_vlan_info *vinfo; - struct switchdev_obj_port_vlan vlan = { - .obj.orig_dev = dev, - .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, - }; - int rem; - int err; - - nla_for_each_nested(attr, afspec, rem) { - if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) - continue; - if (nla_len(attr) != sizeof(struct bridge_vlan_info)) - return -EINVAL; - vinfo = nla_data(attr); - if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) - return -EINVAL; - vlan.flags = vinfo->flags; - if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { - if (vlan.vid_begin) - return -EINVAL; - vlan.vid_begin = vinfo->vid; - /* don't allow range of pvids */ - if (vlan.flags & BRIDGE_VLAN_INFO_PVID) - return -EINVAL; - } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { - if (!vlan.vid_begin) - return -EINVAL; - vlan.vid_end = vinfo->vid; - if (vlan.vid_end <= vlan.vid_begin) - return -EINVAL; - err = f(dev, &vlan.obj); - if (err) - return err; - vlan.vid_begin = 0; - } else { - if (vlan.vid_begin) - return -EINVAL; - vlan.vid_begin = vinfo->vid; - vlan.vid_end = vinfo->vid; - err = f(dev, &vlan.obj); - if (err) - return err; - vlan.vid_begin = 0; - } - } - - return 0; -} - -/** - * switchdev_port_bridge_setlink - Set bridge port attributes - * - * @dev: port device - * @nlh: netlink header - * @flags: netlink flags - * - * Called for SELF on rtnl_bridge_setlink to set bridge port - * attributes. - */ -int switchdev_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) -{ - struct nlattr *protinfo; - struct nlattr *afspec; - int err = 0; - - if (!netif_is_bridge_port(dev)) - return -EOPNOTSUPP; - - protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), - IFLA_PROTINFO); - if (protinfo) { - err = switchdev_port_br_setlink_protinfo(dev, protinfo); - if (err) - return err; - } - - afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), - IFLA_AF_SPEC); - if (afspec) - err = switchdev_port_br_afspec(dev, afspec, - switchdev_port_obj_add); - - return err; -} -EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink); - -/** - * switchdev_port_bridge_dellink - Set bridge port attributes - * - * @dev: port device - * @nlh: netlink header - * @flags: netlink flags - * - * Called for SELF on rtnl_bridge_dellink to set bridge port - * attributes. - */ -int switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) -{ - struct nlattr *afspec; - - if (!netif_is_bridge_port(dev)) - return -EOPNOTSUPP; - - afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), - IFLA_AF_SPEC); - if (afspec) - return switchdev_port_br_afspec(dev, afspec, - switchdev_port_obj_del); - - return 0; -} -EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); - -/** - * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port - * - * @ndmsg: netlink hdr - * @nlattr: netlink attributes - * @dev: port device - * @addr: MAC address to add - * @vid: VLAN to add - * - * Add FDB entry to switch device. - */ -int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, const unsigned char *addr, - u16 vid, u16 nlm_flags) -{ - struct switchdev_obj_port_fdb fdb = { - .obj.orig_dev = dev, - .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, - .vid = vid, - }; - - ether_addr_copy(fdb.addr, addr); - return switchdev_port_obj_add(dev, &fdb.obj); -} -EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); - -/** - * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port - * - * @ndmsg: netlink hdr - * @nlattr: netlink attributes - * @dev: port device - * @addr: MAC address to delete - * @vid: VLAN to delete - * - * Delete FDB entry from switch device. - */ -int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, const unsigned char *addr, - u16 vid) -{ - struct switchdev_obj_port_fdb fdb = { - .obj.orig_dev = dev, - .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, - .vid = vid, - }; - - ether_addr_copy(fdb.addr, addr); - return switchdev_port_obj_del(dev, &fdb.obj); -} -EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); - -struct switchdev_fdb_dump { - struct switchdev_obj_port_fdb fdb; - struct net_device *dev; - struct sk_buff *skb; - struct netlink_callback *cb; - int idx; -}; - -static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj) -{ - struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj); - struct switchdev_fdb_dump *dump = - container_of(fdb, struct switchdev_fdb_dump, fdb); - u32 portid = NETLINK_CB(dump->cb->skb).portid; - u32 seq = dump->cb->nlh->nlmsg_seq; - struct nlmsghdr *nlh; - struct ndmsg *ndm; - - if (dump->idx < dump->cb->args[2]) - goto skip; - - nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, - sizeof(*ndm), NLM_F_MULTI); - if (!nlh) - return -EMSGSIZE; - - ndm = nlmsg_data(nlh); - ndm->ndm_family = AF_BRIDGE; - ndm->ndm_pad1 = 0; - ndm->ndm_pad2 = 0; - ndm->ndm_flags = NTF_SELF; - ndm->ndm_type = 0; - ndm->ndm_ifindex = dump->dev->ifindex; - ndm->ndm_state = fdb->ndm_state; - - if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr)) - goto nla_put_failure; - - if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid)) - goto nla_put_failure; - - nlmsg_end(dump->skb, nlh); - -skip: - dump->idx++; - return 0; - -nla_put_failure: - nlmsg_cancel(dump->skb, nlh); - return -EMSGSIZE; -} - -/** - * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries - * - * @skb: netlink skb - * @cb: netlink callback - * @dev: port device - * @filter_dev: filter device - * @idx: - * - * Dump FDB entries from switch device. - */ -int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, - struct net_device *filter_dev, int *idx) -{ - struct switchdev_fdb_dump dump = { - .fdb.obj.orig_dev = dev, - .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, - .dev = dev, - .skb = skb, - .cb = cb, - .idx = *idx, - }; - int err; - - err = switchdev_port_obj_dump(dev, &dump.fdb.obj, - switchdev_port_fdb_dump_cb); - *idx = dump.idx; - return err; -} -EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); - bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7b52a380d710..5c53f22d62e8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1528,26 +1528,13 @@ static inline bool too_many_unix_fds(struct task_struct *p) return false; } -#define MAX_RECURSION_LEVEL 4 - static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; - unsigned char max_level = 0; if (too_many_unix_fds(current)) return -ETOOMANYREFS; - for (i = scm->fp->count - 1; i >= 0; i--) { - struct sock *sk = unix_get_socket(scm->fp->fp[i]); - - if (sk) - max_level = max(max_level, - unix_sk(sk)->recursion_level); - } - if (unlikely(max_level > MAX_RECURSION_LEVEL)) - return -ETOOMANYREFS; - /* * Need to duplicate file references for the sake of garbage * collection. Otherwise a socket in the fps might become a @@ -1559,7 +1546,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) for (i = scm->fp->count - 1; i >= 0; i--) unix_inflight(scm->fp->user, scm->fp->fp[i]); - return max_level; + return 0; } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) @@ -1649,7 +1636,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, struct sk_buff *skb; long timeo; struct scm_cookie scm; - int max_level; int data_len = 0; int sk_locked; @@ -1701,7 +1687,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = unix_scm_to_skb(&scm, skb, true); if (err < 0) goto out_free; - max_level = err + 1; skb_put(skb, len - data_len); skb->data_len = data_len; @@ -1819,8 +1804,6 @@ restart_locked: __net_timestamp(skb); maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); - if (max_level > unix_sk(other)->recursion_level) - unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); @@ -1855,7 +1838,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int sent = 0; struct scm_cookie scm; bool fds_sent = false; - int max_level; int data_len; wait_for_unix_gc(); @@ -1905,7 +1887,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, kfree_skb(skb); goto out_err; } - max_level = err + 1; fds_sent = true; skb_put(skb, size - data_len); @@ -1925,8 +1906,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); - if (max_level > unix_sk(other)->recursion_level) - unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other); sent += size; @@ -2324,7 +2303,6 @@ redo: last_len = last ? last->len : 0; again: if (skb == NULL) { - unix_sk(sk)->recursion_level = 0; if (copied >= target) goto unlock; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 5a1a98df3499..ac095936552d 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -74,7 +74,7 @@ DEFINE_RWLOCK(x25_list_lock); static const struct proto_ops x25_proto_ops; -static struct x25_address null_x25_address = {" "}; +static const struct x25_address null_x25_address = {" "}; #ifdef CONFIG_COMPAT struct compat_x25_subscrip_struct { diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 5f7e8bfa0c2d..5cd7a244e88d 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -153,6 +153,7 @@ static int xfrm_dev_register(struct net_device *dev) static int xfrm_dev_unregister(struct net_device *dev) { + xfrm_policy_cache_flush(); return NOTIFY_DONE; } @@ -175,8 +176,7 @@ static int xfrm_dev_down(struct net_device *dev) if (dev->features & NETIF_F_HW_ESP) xfrm_dev_state_flush(dev_net(dev), dev, true); - xfrm_garbage_collect(dev_net(dev)); - + xfrm_policy_cache_flush(); return NOTIFY_DONE; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ff61d8557929..8da428f56aec 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -24,6 +24,7 @@ #include <linux/netfilter.h> #include <linux/module.h> #include <linux/cache.h> +#include <linux/cpu.h> #include <linux/audit.h> #include <net/dst.h> #include <net/flow.h> @@ -44,6 +45,8 @@ struct xfrm_flo { u8 flags; }; +static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst); +static struct work_struct *xfrm_pcpu_work __read_mostly; static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; @@ -246,36 +249,6 @@ expired: xfrm_pol_put(xp); } -static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo) -{ - struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); - - if (unlikely(pol->walk.dead)) - flo = NULL; - else - xfrm_pol_hold(pol); - - return flo; -} - -static int xfrm_policy_flo_check(struct flow_cache_object *flo) -{ - struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); - - return !pol->walk.dead; -} - -static void xfrm_policy_flo_delete(struct flow_cache_object *flo) -{ - xfrm_pol_put(container_of(flo, struct xfrm_policy, flo)); -} - -static const struct flow_cache_ops xfrm_policy_fc_ops = { - .get = xfrm_policy_flo_get, - .check = xfrm_policy_flo_check, - .delete = xfrm_policy_flo_delete, -}; - /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 * SPD calls. */ @@ -298,7 +271,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) (unsigned long)policy); setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, (unsigned long)policy); - policy->flo.ops = &xfrm_policy_fc_ops; } return policy; } @@ -798,7 +770,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) else hlist_add_head(&policy->bydst, chain); __xfrm_policy_link(policy, dir); - atomic_inc(&net->xfrm.flow_cache_genid); /* After previous checking, family can either be AF_INET or AF_INET6 */ if (policy->family == AF_INET) @@ -1004,6 +975,8 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) } if (!cnt) err = -ESRCH; + else + xfrm_policy_cache_flush(); out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; @@ -1175,7 +1148,7 @@ fail: } static struct xfrm_policy * -__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) +xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_policy *pol; @@ -1187,61 +1160,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); } -static int flow_to_policy_dir(int dir) -{ - if (XFRM_POLICY_IN == FLOW_DIR_IN && - XFRM_POLICY_OUT == FLOW_DIR_OUT && - XFRM_POLICY_FWD == FLOW_DIR_FWD) - return dir; - - switch (dir) { - default: - case FLOW_DIR_IN: - return XFRM_POLICY_IN; - case FLOW_DIR_OUT: - return XFRM_POLICY_OUT; - case FLOW_DIR_FWD: - return XFRM_POLICY_FWD; - } -} - -static struct flow_cache_object * -xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, - u8 dir, struct flow_cache_object *old_obj, void *ctx) -{ - struct xfrm_policy *pol; - - if (old_obj) - xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); - - pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir)); - if (IS_ERR_OR_NULL(pol)) - return ERR_CAST(pol); - - /* Resolver returns two references: - * one for cache and one for caller of flow_cache_lookup() */ - xfrm_pol_hold(pol); - - return &pol->flo; -} - -static inline int policy_to_flow_dir(int dir) -{ - if (XFRM_POLICY_IN == FLOW_DIR_IN && - XFRM_POLICY_OUT == FLOW_DIR_OUT && - XFRM_POLICY_FWD == FLOW_DIR_FWD) - return dir; - switch (dir) { - default: - case XFRM_POLICY_IN: - return FLOW_DIR_IN; - case XFRM_POLICY_OUT: - return FLOW_DIR_OUT; - case XFRM_POLICY_FWD: - return FLOW_DIR_FWD; - } -} - static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, const struct flowi *fl, u16 family) { @@ -1261,7 +1179,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, } err = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, - policy_to_flow_dir(dir)); + dir); if (!err) { if (!xfrm_pol_hold_rcu(pol)) goto again; @@ -1545,58 +1463,6 @@ static int xfrm_get_tos(const struct flowi *fl, int family) return tos; } -static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - if (xdst->route == NULL) { - /* Dummy bundle - if it has xfrms we were not - * able to build bundle as template resolution failed. - * It means we need to try again resolving. */ - if (xdst->num_xfrms > 0) - return NULL; - } else if (dst->flags & DST_XFRM_QUEUE) { - return NULL; - } else { - /* Real bundle */ - if (stale_bundle(dst)) - return NULL; - } - - dst_hold(dst); - return flo; -} - -static int xfrm_bundle_flo_check(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - if (!xdst->route) - return 0; - if (stale_bundle(dst)) - return 0; - - return 1; -} - -static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - dst->obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(dst); -} - -static const struct flow_cache_ops xfrm_bundle_fc_ops = { - .get = xfrm_bundle_flo_get, - .check = xfrm_bundle_flo_check, - .delete = xfrm_bundle_flo_delete, -}; - static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); @@ -1624,7 +1490,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) struct dst_entry *dst = &xdst->u.dst; memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); - xdst->flo.ops = &xfrm_bundle_fc_ops; } else xdst = ERR_PTR(-ENOBUFS); @@ -1840,6 +1705,102 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, } +static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old) +{ + this_cpu_write(xfrm_last_dst, xdst); + if (old) + dst_release(&old->u.dst); +} + +static void __xfrm_pcpu_work_fn(void) +{ + struct xfrm_dst *old; + + old = this_cpu_read(xfrm_last_dst); + if (old && !xfrm_bundle_ok(old)) + xfrm_last_dst_update(NULL, old); +} + +static void xfrm_pcpu_work_fn(struct work_struct *work) +{ + local_bh_disable(); + rcu_read_lock(); + __xfrm_pcpu_work_fn(); + rcu_read_unlock(); + local_bh_enable(); +} + +void xfrm_policy_cache_flush(void) +{ + struct xfrm_dst *old; + bool found = 0; + int cpu; + + local_bh_disable(); + rcu_read_lock(); + for_each_possible_cpu(cpu) { + old = per_cpu(xfrm_last_dst, cpu); + if (old && !xfrm_bundle_ok(old)) { + if (smp_processor_id() == cpu) { + __xfrm_pcpu_work_fn(); + continue; + } + found = true; + break; + } + } + + rcu_read_unlock(); + local_bh_enable(); + + if (!found) + return; + + get_online_cpus(); + + for_each_possible_cpu(cpu) { + bool bundle_release; + + rcu_read_lock(); + old = per_cpu(xfrm_last_dst, cpu); + bundle_release = old && !xfrm_bundle_ok(old); + rcu_read_unlock(); + + if (!bundle_release) + continue; + + if (cpu_online(cpu)) { + schedule_work_on(cpu, &xfrm_pcpu_work[cpu]); + continue; + } + + rcu_read_lock(); + old = per_cpu(xfrm_last_dst, cpu); + if (old && !xfrm_bundle_ok(old)) { + per_cpu(xfrm_last_dst, cpu) = NULL; + dst_release(&old->u.dst); + } + rcu_read_unlock(); + } + + put_online_cpus(); +} + +static bool xfrm_pol_dead(struct xfrm_dst *xdst) +{ + unsigned int num_pols = xdst->num_pols; + unsigned int pol_dead = 0, i; + + for (i = 0; i < num_pols; i++) + pol_dead |= xdst->pols[i]->walk.dead; + + /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ + if (pol_dead) + xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; + + return pol_dead; +} + static struct xfrm_dst * xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, const struct flowi *fl, u16 family, @@ -1847,10 +1808,23 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, { struct net *net = xp_net(pols[0]); struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; + struct xfrm_dst *xdst, *old; struct dst_entry *dst; - struct xfrm_dst *xdst; int err; + xdst = this_cpu_read(xfrm_last_dst); + if (xdst && + xdst->u.dst.dev == dst_orig->dev && + xdst->num_pols == num_pols && + !xfrm_pol_dead(xdst) && + memcmp(xdst->pols, pols, + sizeof(struct xfrm_policy *) * num_pols) == 0 && + xfrm_bundle_ok(xdst)) { + dst_hold(&xdst->u.dst); + return xdst; + } + + old = xdst; /* Try to instantiate a bundle */ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); if (err <= 0) { @@ -1871,6 +1845,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); xdst->policy_genid = atomic_read(&pols[0]->genid); + atomic_set(&xdst->u.dst.__refcnt, 2); + xfrm_last_dst_update(xdst, old); + return xdst; } @@ -2051,86 +2028,39 @@ free_dst: goto out; } -static struct flow_cache_object * -xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, - struct flow_cache_object *oldflo, void *ctx) +static struct xfrm_dst * +xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo) { - struct xfrm_flo *xflo = (struct xfrm_flo *)ctx; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; - struct xfrm_dst *xdst, *new_xdst; - int num_pols = 0, num_xfrms = 0, i, err, pol_dead; - - /* Check if the policies from old bundle are usable */ - xdst = NULL; - if (oldflo) { - xdst = container_of(oldflo, struct xfrm_dst, flo); - num_pols = xdst->num_pols; - num_xfrms = xdst->num_xfrms; - pol_dead = 0; - for (i = 0; i < num_pols; i++) { - pols[i] = xdst->pols[i]; - pol_dead |= pols[i]->walk.dead; - } - if (pol_dead) { - /* Mark DST_OBSOLETE_DEAD to fail the next - * xfrm_dst_check() - */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); - xdst = NULL; - num_pols = 0; - num_xfrms = 0; - oldflo = NULL; - } - } + int num_pols = 0, num_xfrms = 0, err; + struct xfrm_dst *xdst; /* Resolve policies to use if we couldn't get them from * previous cache entry */ - if (xdst == NULL) { - num_pols = 1; - pols[0] = __xfrm_policy_lookup(net, fl, family, - flow_to_policy_dir(dir)); - err = xfrm_expand_policies(fl, family, pols, + num_pols = 1; + pols[0] = xfrm_policy_lookup(net, fl, family, dir); + err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); - if (err < 0) - goto inc_error; - if (num_pols == 0) - return NULL; - if (num_xfrms <= 0) - goto make_dummy_bundle; - } + if (err < 0) + goto inc_error; + if (num_pols == 0) + return NULL; + if (num_xfrms <= 0) + goto make_dummy_bundle; - new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, + xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, xflo->dst_orig); - if (IS_ERR(new_xdst)) { - err = PTR_ERR(new_xdst); + if (IS_ERR(xdst)) { + err = PTR_ERR(xdst); if (err != -EAGAIN) goto error; - if (oldflo == NULL) - goto make_dummy_bundle; - dst_hold(&xdst->u.dst); - return oldflo; - } else if (new_xdst == NULL) { + goto make_dummy_bundle; + } else if (xdst == NULL) { num_xfrms = 0; - if (oldflo == NULL) - goto make_dummy_bundle; - xdst->num_xfrms = 0; - dst_hold(&xdst->u.dst); - return oldflo; - } - - /* Kill the previous bundle */ - if (xdst) { - /* The policies were stolen for newly generated bundle */ - xdst->num_pols = 0; - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); + goto make_dummy_bundle; } - /* We do need to return one reference for original caller */ - dst_hold(&new_xdst->u.dst); - return &new_xdst->flo; + return xdst; make_dummy_bundle: /* We found policies, but there's no bundles to instantiate: @@ -2146,17 +2076,12 @@ make_dummy_bundle: memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); dst_hold(&xdst->u.dst); - return &xdst->flo; + return xdst; inc_error: XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); error: - if (xdst != NULL) { - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); - } else - xfrm_pols_put(pols, num_pols); + xfrm_pols_put(pols, num_pols); return ERR_PTR(err); } @@ -2187,11 +2112,10 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, const struct sock *sk, int flags) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; - struct flow_cache_object *flo; struct xfrm_dst *xdst; struct dst_entry *dst, *route; u16 family = dst_orig->ops->family; - u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); + u8 dir = XFRM_POLICY_OUT; int i, err, num_pols, num_xfrms = 0, drop_pols = 0; dst = NULL; @@ -2242,15 +2166,13 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; - flo = flow_cache_lookup(net, fl, family, dir, - xfrm_bundle_lookup, &xflo); - if (flo == NULL) + xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo); + if (xdst == NULL) goto nopol; - if (IS_ERR(flo)) { - err = PTR_ERR(flo); + if (IS_ERR(xdst)) { + err = PTR_ERR(xdst); goto dropdst; } - xdst = container_of(flo, struct xfrm_dst, flo); num_pols = xdst->num_pols; num_xfrms = xdst->num_xfrms; @@ -2449,12 +2371,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, int pi; int reverse; struct flowi fl; - u8 fl_dir; int xerr_idx = -1; reverse = dir & ~XFRM_POLICY_MASK; dir &= XFRM_POLICY_MASK; - fl_dir = policy_to_flow_dir(dir); if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); @@ -2486,16 +2406,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } } - if (!pol) { - struct flow_cache_object *flo; - - flo = flow_cache_lookup(net, &fl, family, fl_dir, - xfrm_policy_lookup, NULL); - if (IS_ERR_OR_NULL(flo)) - pol = ERR_CAST(flo); - else - pol = container_of(flo, struct xfrm_policy, flo); - } + if (!pol) + pol = xfrm_policy_lookup(net, &fl, family, dir); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); @@ -2641,11 +2553,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) * notice. That's what we are validating here via the * stale_bundle() check. * - * When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will - * be marked on it. * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will * be marked on it. - * Both will force stable_bundle() to fail on any xdst bundle with + * This will force stale_bundle() to fail on any xdst bundle with * this dst linked in it. */ if (dst->obsolete < 0 && !stale_bundle(dst)) @@ -2685,18 +2595,6 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } -void xfrm_garbage_collect(struct net *net) -{ - flow_cache_flush(net); -} -EXPORT_SYMBOL(xfrm_garbage_collect); - -void xfrm_garbage_collect_deferred(struct net *net) -{ - flow_cache_flush_deferred(net); -} -EXPORT_SYMBOL(xfrm_garbage_collect_deferred); - static void xfrm_init_pmtu(struct dst_entry *dst) { do { @@ -3034,14 +2932,9 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; - rv = flow_cache_init(net); - if (rv < 0) - goto out; return 0; -out: - xfrm_sysctl_fini(net); out_sysctl: xfrm_policy_fini(net); out_policy: @@ -3054,7 +2947,6 @@ out_statistics: static void __net_exit xfrm_net_exit(struct net *net) { - flow_cache_fini(net); xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); @@ -3068,7 +2960,15 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { - flow_cache_hp_init(); + int i; + + xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work), + GFP_KERNEL); + BUG_ON(!xfrm_pcpu_work); + + for (i = 0; i < NR_CPUS; i++) + INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn); + register_pernet_subsys(&xfrm_net_ops); seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 6c0956d10db6..82cbbce69b79 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -724,9 +724,10 @@ restart: } } } - if (cnt) + if (cnt) { err = 0; - + xfrm_policy_cache_flush(); + } out: spin_unlock_bh(&net->xfrm.xfrm_state_lock); return err; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2be4c6af008a..1b539b7dcfab 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1815,8 +1815,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, out: xfrm_pol_put(xp); - if (delete && err == 0) - xfrm_garbage_collect(net); return err; } @@ -2027,7 +2025,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; return err; } - xfrm_garbage_collect(net); c.data.type = type; c.event = nlh->nlmsg_type; |