diff options
Diffstat (limited to 'net')
353 files changed, 8199 insertions, 4326 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index e97ab824e368..9ee5787634e5 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -562,8 +562,7 @@ static int vlan_dev_init(struct net_device *dev) NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC | NETIF_F_ALL_FCOE; - dev->features |= real_dev->vlan_features | NETIF_F_LLTX | - NETIF_F_GSO_SOFTWARE; + dev->features |= dev->hw_features | NETIF_F_LLTX; dev->gso_max_size = real_dev->gso_max_size; dev->gso_max_segs = real_dev->gso_max_segs; if (dev->features & NETIF_F_VLAN_FEATURES) diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 1270207f3d7c..9c94aad153b3 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -35,7 +35,8 @@ static inline int vlan_validate_qos_map(struct nlattr *attr) { if (!attr) return 0; - return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy); + return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy, + NULL); } static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) diff --git a/net/Makefile b/net/Makefile index 9b681550e3a3..9086ffbb5085 100644 --- a/net/Makefile +++ b/net/Makefile @@ -12,7 +12,7 @@ obj-$(CONFIG_NET) += $(tmp-y) # LLC has to be linked before the files in net/802/ obj-$(CONFIG_LLC) += llc/ -obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ +obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_XFRM) += xfrm/ diff --git a/net/atm/clip.c b/net/atm/clip.c index 53b4ac09e7b7..ec527b62f79d 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -106,7 +106,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc) entry->expires = jiffies - 1; /* force resolution or expiration */ error = neigh_update(entry->neigh, NULL, NUD_NONE, - NEIGH_UPDATE_F_ADMIN); + NEIGH_UPDATE_F_ADMIN, 0); if (error) pr_crit("neigh_update failed with %d\n", error); goto out; @@ -481,7 +481,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) link_vcc(clip_vcc, entry); } error = neigh_update(neigh, llc_oui, NUD_PERMANENT, - NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN); + NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 0); neigh_release(neigh); return error; } diff --git a/net/atm/common.c b/net/atm/common.c index 9613381f5db0..f06422f4108d 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -62,21 +62,16 @@ static void vcc_remove_socket(struct sock *sk) write_unlock_irq(&vcc_sklist_lock); } -static struct sk_buff *alloc_tx(struct atm_vcc *vcc, unsigned int size) +static bool vcc_tx_ready(struct atm_vcc *vcc, unsigned int size) { - struct sk_buff *skb; struct sock *sk = sk_atm(vcc); if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) { pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n", sk_wmem_alloc_get(sk), size, sk->sk_sndbuf); - return NULL; + return false; } - while (!(skb = alloc_skb(size, GFP_KERNEL))) - schedule(); - pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); - atomic_add(skb->truesize, &sk->sk_wmem_alloc); - return skb; + return true; } static void vcc_sock_destruct(struct sock *sk) @@ -606,7 +601,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) eff = (size+3) & ~3; /* align to word boundary */ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); error = 0; - while (!(skb = alloc_tx(vcc, eff))) { + while (!vcc_tx_ready(vcc, eff)) { if (m->msg_flags & MSG_DONTWAIT) { error = -EAGAIN; break; @@ -628,6 +623,15 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) finish_wait(sk_sleep(sk), &wait); if (error) goto out; + + skb = alloc_skb(eff, GFP_KERNEL); + if (!skb) { + error = -ENOMEM; + goto out; + } + pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); + atomic_add(skb->truesize, &sk->sk_wmem_alloc); + skb->dev = NULL; /* for paths shared with net_device interfaces */ ATM_SKB(skb)->atm_options = vcc->atm_options; if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 7c3d994e90d8..495ba7cdcb04 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -679,15 +679,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_forw_packet *forw_packet_aggr; + struct sk_buff *skb; unsigned char *skb_buff; unsigned int skb_size; atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left; - forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing, - queue_left, bat_priv); - if (!forw_packet_aggr) - return; - if (atomic_read(&bat_priv->aggregated_ogms) && packet_len < BATADV_MAX_AGGREGATION_BYTES) skb_size = BATADV_MAX_AGGREGATION_BYTES; @@ -696,9 +692,14 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, skb_size += ETH_HLEN; - forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); - if (!forw_packet_aggr->skb) { - batadv_forw_packet_free(forw_packet_aggr, true); + skb = netdev_alloc_skb_ip_align(NULL, skb_size); + if (!skb) + return; + + forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing, + queue_left, bat_priv, skb); + if (!forw_packet_aggr) { + kfree_skb(skb); return; } @@ -2477,6 +2478,16 @@ static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface) batadv_iv_ogm_schedule(hard_iface); } +/** + * batadv_iv_init_sel_class - initialize GW selection class + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_iv_init_sel_class(struct batadv_priv *bat_priv) +{ + /* set default TQ difference threshold to 20 */ + atomic_set(&bat_priv->gw.sel_class, 20); +} + static struct batadv_gw_node * batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv) { @@ -2823,6 +2834,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .del_if = batadv_iv_ogm_orig_del_if, }, .gw = { + .init_sel_class = batadv_iv_init_sel_class, .get_best_gw_node = batadv_iv_gw_get_best_gw_node, .is_eligible = batadv_iv_gw_is_eligible, #ifdef CONFIG_BATMAN_ADV_DEBUGFS diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 0acd081dd286..a36c8e7291d6 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -668,6 +668,16 @@ err_ifinfo1: return ret; } +/** + * batadv_v_init_sel_class - initialize GW selection class + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_v_init_sel_class(struct batadv_priv *bat_priv) +{ + /* set default throughput difference threshold to 5Mbps */ + atomic_set(&bat_priv->gw.sel_class, 50); +} + static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv, char *buff, size_t count) { @@ -1052,6 +1062,7 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = { .dump = batadv_v_orig_dump, }, .gw = { + .init_sel_class = batadv_v_init_sel_class, .store_sel_class = batadv_v_store_sel_class, .show_sel_class = batadv_v_show_sel_class, .get_best_gw_node = batadv_v_gw_get_best_gw_node, @@ -1092,9 +1103,6 @@ int batadv_v_mesh_init(struct batadv_priv *bat_priv) if (ret < 0) return ret; - /* set default throughput difference threshold to 5Mbps */ - atomic_set(&bat_priv->gw.sel_class, 50); - return 0; } diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ba8420d8a992..d07e89ec8467 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -395,7 +395,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, ether_addr_copy(ethhdr->h_source, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): CLAIM %pM on vid %d\n", mac, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_UNCLAIM: /* unclaim frame @@ -404,7 +404,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, ether_addr_copy(hw_src, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): UNCLAIM %pM on vid %d\n", mac, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_ANNOUNCE: /* announcement frame @@ -413,7 +413,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, ether_addr_copy(hw_src, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): ANNOUNCE of %pM on vid %d\n", - ethhdr->h_source, BATADV_PRINT_VID(vid)); + ethhdr->h_source, batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_REQUEST: /* request frame @@ -425,14 +425,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n", ethhdr->h_source, ethhdr->h_dest, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_LOOPDETECT: ether_addr_copy(ethhdr->h_source, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): LOOPDETECT of %pM to %pM on vid %d\n", ethhdr->h_source, ethhdr->h_dest, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); break; } @@ -475,9 +475,9 @@ static void batadv_bla_loopdetect_report(struct work_struct *work) batadv_info(bat_priv->soft_iface, "Possible loop on VLAN %d detected which can't be handled by BLA - please check your network setup!\n", - BATADV_PRINT_VID(backbone_gw->vid)); + batadv_print_vid(backbone_gw->vid)); snprintf(vid_str, sizeof(vid_str), "%d", - BATADV_PRINT_VID(backbone_gw->vid)); + batadv_print_vid(backbone_gw->vid)); vid_str[sizeof(vid_str) - 1] = 0; batadv_throw_uevent(bat_priv, BATADV_UEV_BLA, BATADV_UEV_LOOPDETECT, @@ -510,7 +510,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n", - orig, BATADV_PRINT_VID(vid)); + orig, batadv_print_vid(vid)); entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) @@ -719,7 +719,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", - mac, BATADV_PRINT_VID(vid)); + mac, batadv_print_vid(vid)); kref_get(&claim->refcount); hash_added = batadv_hash_add(bat_priv->bla.claim_hash, @@ -739,8 +739,8 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, goto claim_free_ref; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_add_claim(): changing ownership for %pM, vid %d\n", - mac, BATADV_PRINT_VID(vid)); + "bla_add_claim(): changing ownership for %pM, vid %d to gw %pM\n", + mac, batadv_print_vid(vid), backbone_gw->orig); remove_crc = true; } @@ -809,7 +809,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, return; batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n", - mac, BATADV_PRINT_VID(vid)); + mac, batadv_print_vid(vid)); batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim); @@ -849,7 +849,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", - BATADV_PRINT_VID(vid), backbone_gw->orig, crc); + batadv_print_vid(vid), backbone_gw->orig, crc); spin_lock_bh(&backbone_gw->crc_lock); backbone_crc = backbone_gw->crc; @@ -859,7 +859,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", backbone_gw->orig, - BATADV_PRINT_VID(backbone_gw->vid), + batadv_print_vid(backbone_gw->vid), backbone_crc, crc); batadv_bla_send_request(backbone_gw); @@ -904,7 +904,7 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_request(): REQUEST vid %d (sent by %pM)...\n", - BATADV_PRINT_VID(vid), ethhdr->h_source); + batadv_print_vid(vid), ethhdr->h_source); batadv_bla_answer_request(bat_priv, primary_if, vid); return true; @@ -941,7 +941,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, /* this must be an UNCLAIM frame */ batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n", - claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig); + claim_addr, batadv_print_vid(vid), backbone_gw->orig); batadv_bla_del_claim(bat_priv, claim_addr, vid); batadv_backbone_gw_put(backbone_gw); @@ -1161,7 +1161,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, if (ret == 1) batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, + ethhdr->h_source, batadv_print_vid(vid), hw_src, hw_dst); if (ret < 2) @@ -1197,7 +1197,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst); + ethhdr->h_source, batadv_print_vid(vid), hw_src, hw_dst); return true; } @@ -1295,10 +1295,13 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, goto skip; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_purge_claims(): %pM, vid %d, time out\n", - claim->addr, claim->vid); + "bla_purge_claims(): timed out.\n"); purge_now: + batadv_dbg(BATADV_DBG_BLA, bat_priv, + "bla_purge_claims(): %pM, vid %d\n", + claim->addr, claim->vid); + batadv_handle_unclaim(bat_priv, primary_if, backbone_gw->orig, claim->addr, claim->vid); @@ -1846,6 +1849,13 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, /* possible optimization: race for a claim */ /* No claim exists yet, claim it for us! */ + + batadv_dbg(BATADV_DBG_BLA, bat_priv, + "bla_rx(): Unclaimed MAC %pM found. Claim it. Local: %s\n", + ethhdr->h_source, + batadv_is_my_client(bat_priv, + ethhdr->h_source, vid) ? + "yes" : "no"); batadv_handle_claim(bat_priv, primary_if, primary_if->net_dev->dev_addr, ethhdr->h_source, vid); @@ -1963,10 +1973,22 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, /* if yes, the client has roamed and we have * to unclaim it. */ - batadv_handle_unclaim(bat_priv, primary_if, - primary_if->net_dev->dev_addr, - ethhdr->h_source, vid); - goto allow; + if (batadv_has_timed_out(claim->lasttime, 100)) { + /* only unclaim if the last claim entry is + * older than 100 ms to make sure we really + * have a roaming client here. + */ + batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Roaming client %pM detected. Unclaim it.\n", + ethhdr->h_source); + batadv_handle_unclaim(bat_priv, primary_if, + primary_if->net_dev->dev_addr, + ethhdr->h_source, vid); + goto allow; + } else { + batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Race for claim %pM detected. Drop packet.\n", + ethhdr->h_source); + goto handled; + } } /* check if it is a multicast/broadcast frame */ @@ -2042,7 +2064,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) backbone_crc = backbone_gw->crc; spin_unlock_bh(&backbone_gw->crc_lock); seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n", - claim->addr, BATADV_PRINT_VID(claim->vid), + claim->addr, batadv_print_vid(claim->vid), backbone_gw->orig, (is_own ? 'x' : ' '), backbone_crc); @@ -2274,7 +2296,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n", backbone_gw->orig, - BATADV_PRINT_VID(backbone_gw->vid), secs, + batadv_print_vid(backbone_gw->vid), secs, msecs, backbone_crc); } rcu_read_unlock(); @@ -2449,3 +2471,52 @@ out: return ret; } + +#ifdef CONFIG_BATMAN_ADV_DAT +/** + * batadv_bla_check_claim - check if address is claimed + * + * @bat_priv: the bat priv with all the soft interface information + * @addr: mac address of which the claim status is checked + * @vid: the VLAN ID + * + * addr is checked if this address is claimed by the local device itself. + * + * Return: true if bla is disabled or the mac is claimed by the device, + * false if the device addr is already claimed by another gateway + */ +bool batadv_bla_check_claim(struct batadv_priv *bat_priv, + u8 *addr, unsigned short vid) +{ + struct batadv_bla_claim search_claim; + struct batadv_bla_claim *claim = NULL; + struct batadv_hard_iface *primary_if = NULL; + bool ret = true; + + if (!atomic_read(&bat_priv->bridge_loop_avoidance)) + return ret; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + return ret; + + /* First look if the mac address is claimed */ + ether_addr_copy(search_claim.addr, addr); + search_claim.vid = vid; + + claim = batadv_claim_hash_find(bat_priv, &search_claim); + + /* If there is a claim and we are not owner of the claim, + * return false. + */ + if (claim) { + if (!batadv_compare_eth(claim->backbone_gw->orig, + primary_if->net_dev->dev_addr)) + ret = false; + batadv_claim_put(claim); + } + + batadv_hardif_put(primary_if); + return ret; +} +#endif diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index e157986bd01c..234775748b8e 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -69,6 +69,10 @@ void batadv_bla_status_update(struct net_device *net_dev); int batadv_bla_init(struct batadv_priv *bat_priv); void batadv_bla_free(struct batadv_priv *bat_priv); int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb); +#ifdef CONFIG_BATMAN_ADV_DAT +bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr, + unsigned short vid); +#endif #define BATADV_BLA_CRC_INIT 0 #else /* ifdef CONFIG_BATMAN_ADV_BLA */ @@ -145,6 +149,13 @@ static inline int batadv_bla_backbone_dump(struct sk_buff *msg, return -EOPNOTSUPP; } +static inline +bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr, + unsigned short vid) +{ + return true; +} + #endif /* ifdef CONFIG_BATMAN_ADV_BLA */ #endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */ diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 1bfd1dbc2feb..013e970eff39 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -43,6 +43,7 @@ #include <linux/workqueue.h> #include <net/arp.h> +#include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" #include "log.h" @@ -330,7 +331,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, batadv_dbg(BATADV_DBG_DAT, bat_priv, "Entry updated: %pI4 %pM (vid: %d)\n", &dat_entry->ip, dat_entry->mac_addr, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); goto out; } @@ -356,7 +357,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, } batadv_dbg(BATADV_DBG_DAT, bat_priv, "New entry added: %pI4 %pM (vid: %d)\n", - &dat_entry->ip, dat_entry->mac_addr, BATADV_PRINT_VID(vid)); + &dat_entry->ip, dat_entry->mac_addr, batadv_print_vid(vid)); out: if (dat_entry) @@ -835,7 +836,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, " * %15pI4 %14pM %4i %6i:%02i\n", &dat_entry->ip, dat_entry->mac_addr, - BATADV_PRINT_VID(dat_entry->vid), + batadv_print_vid(dat_entry->vid), last_seen_mins, last_seen_secs); } rcu_read_unlock(); @@ -1002,6 +1003,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, bool ret = false; struct batadv_dat_entry *dat_entry = NULL; struct sk_buff *skb_new; + struct net_device *soft_iface = bat_priv->soft_iface; int hdr_size = 0; unsigned short vid; @@ -1040,16 +1042,30 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, goto out; } + /* If BLA is enabled, only send ARP replies if we have claimed + * the destination for the ARP request or if no one else of + * the backbone gws belonging to our backbone has claimed the + * destination. + */ + if (!batadv_bla_check_claim(bat_priv, + dat_entry->mac_addr, vid)) { + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "Device %pM claimed by another backbone gw. Don't send ARP reply!", + dat_entry->mac_addr); + ret = true; + goto out; + } + skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src, dat_entry->mac_addr, hw_src, vid); if (!skb_new) goto out; - skb_new->protocol = eth_type_trans(skb_new, - bat_priv->soft_iface); - bat_priv->stats.rx_packets++; - bat_priv->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size; + skb_new->protocol = eth_type_trans(skb_new, soft_iface); + + soft_iface->stats.rx_packets++; + soft_iface->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size; netif_rx(skb_new); batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); @@ -1188,6 +1204,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) { + struct batadv_dat_entry *dat_entry = NULL; u16 type; __be32 ip_src, ip_dst; u8 *hw_src, *hw_dst; @@ -1210,12 +1227,41 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, hw_dst = batadv_arp_hw_dst(skb, hdr_size); ip_dst = batadv_arp_ip_dst(skb, hdr_size); + /* If ip_dst is already in cache and has the right mac address, + * drop this frame if this ARP reply is destined for us because it's + * most probably an ARP reply generated by another node of the DHT. + * We have most probably received already a reply earlier. Delivering + * this frame would lead to doubled receive of an ARP reply. + */ + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_src, vid); + if (dat_entry && batadv_compare_eth(hw_src, dat_entry->mac_addr)) { + batadv_dbg(BATADV_DBG_DAT, bat_priv, "Doubled ARP reply removed: ARP MSG = [src: %pM-%pI4 dst: %pM-%pI4]; dat_entry: %pM-%pI4\n", + hw_src, &ip_src, hw_dst, &ip_dst, + dat_entry->mac_addr, &dat_entry->ip); + dropped = true; + goto out; + } + /* Update our internal cache with both the IP addresses the node got * within the ARP reply */ batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid); batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid); + /* If BLA is enabled, only forward ARP replies if we have claimed the + * source of the ARP reply or if no one else of the same backbone has + * already claimed that client. This prevents that different gateways + * to the same backbone all forward the ARP reply leading to multiple + * replies in the backbone. + */ + if (!batadv_bla_check_claim(bat_priv, hw_src, vid)) { + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "Device %pM claimed by another backbone gw. Drop ARP reply.\n", + hw_src); + dropped = true; + goto out; + } + /* if this REPLY is directed to a client of mine, let's deliver the * packet to the interface */ @@ -1228,6 +1274,8 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, out: if (dropped) kfree_skb(skb); + if (dat_entry) + batadv_dat_entry_put(dat_entry); /* if dropped == false -> deliver to the interface */ return dropped; } @@ -1256,7 +1304,7 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, /* If this packet is an ARP_REQUEST and the node already has the * information that it is going to ask, then the packet can be dropped */ - if (forw_packet->num_packets) + if (batadv_forw_packet_is_rebroadcast(forw_packet)) goto out; vid = batadv_dat_get_vid(forw_packet->skb, &hdr_size); diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 11a23fd6e1a0..8f964beaac28 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -404,7 +404,7 @@ out: * batadv_frag_create - create a fragment from skb * @skb: skb to create fragment from * @frag_head: header to use in new fragment - * @mtu: size of new fragment + * @fragment_size: size of new fragment * * Split the passed skb into two fragments: A new one with size matching the * passed mtu and the old one with the rest. The new skb contains data from the @@ -414,11 +414,11 @@ out: */ static struct sk_buff *batadv_frag_create(struct sk_buff *skb, struct batadv_frag_packet *frag_head, - unsigned int mtu) + unsigned int fragment_size) { struct sk_buff *skb_fragment; unsigned int header_size = sizeof(*frag_head); - unsigned int fragment_size = mtu - header_size; + unsigned int mtu = fragment_size + header_size; skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN); if (!skb_fragment) @@ -456,7 +456,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, struct sk_buff *skb_fragment; unsigned int mtu = neigh_node->if_incoming->net_dev->mtu; unsigned int header_size = sizeof(frag_header); - unsigned int max_fragment_size, max_packet_size; + unsigned int max_fragment_size, num_fragments; int ret; /* To avoid merge and refragmentation at next-hops we never send @@ -464,10 +464,15 @@ int batadv_frag_send_packet(struct sk_buff *skb, */ mtu = min_t(unsigned int, mtu, BATADV_FRAG_MAX_FRAG_SIZE); max_fragment_size = mtu - header_size; - max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS; + + if (skb->len == 0 || max_fragment_size == 0) + return -EINVAL; + + num_fragments = (skb->len - 1) / max_fragment_size + 1; + max_fragment_size = (skb->len - 1) / num_fragments + 1; /* Don't even try to fragment, if we need more than 16 fragments */ - if (skb->len > max_packet_size) { + if (num_fragments > BATADV_FRAG_MAX_FRAGMENTS) { ret = -EAGAIN; goto free_skb; } @@ -507,7 +512,8 @@ int batadv_frag_send_packet(struct sk_buff *skb, goto put_primary_if; } - skb_fragment = batadv_frag_create(skb, &frag_header, mtu); + skb_fragment = batadv_frag_create(skb, &frag_header, + max_fragment_size); if (!skb_fragment) { ret = -ENOMEM; goto put_primary_if; diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 5db2e43e3775..33940c5c74a8 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -253,6 +253,11 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, */ void batadv_gw_init(struct batadv_priv *bat_priv) { + if (bat_priv->algo_ops->gw.init_sel_class) + bat_priv->algo_ops->gw.init_sel_class(bat_priv); + else + atomic_set(&bat_priv->gw.sel_class, 1); + batadv_tvlv_handler_register(bat_priv, batadv_gw_tvlv_ogm_handler_v1, NULL, BATADV_TVLV_GW, 1, BATADV_TVLV_HANDLER_OGM_CIFNOTFND); diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index 7a2b9f4da078..65ce97efa6b5 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -73,9 +73,10 @@ __printf(2, 3); /* possibly ratelimited debug output */ #define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \ do { \ - if (atomic_read(&(bat_priv)->log_level) & (type) && \ + struct batadv_priv *__batpriv = (bat_priv); \ + if (atomic_read(&__batpriv->log_level) & (type) && \ (!(ratelimited) || net_ratelimit())) \ - batadv_debug_log(bat_priv, fmt, ## arg); \ + batadv_debug_log(__batpriv, fmt, ## arg); \ } \ while (0) #else /* !CONFIG_BATMAN_ADV_DEBUG */ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 5000c540614d..fb381fb26a66 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -516,6 +516,9 @@ static void batadv_recv_handler_init(void) BUILD_BUG_ON(sizeof(struct batadv_tvlv_tt_change) != 12); BUILD_BUG_ON(sizeof(struct batadv_tvlv_roam_adv) != 8); + i = FIELD_SIZEOF(struct sk_buff, cb); + BUILD_BUG_ON(sizeof(struct batadv_skb_cb) > i); + /* broadcast packet */ batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 57a8103dbce7..810f7d026f54 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2017.0" +#define BATADV_SOURCE_VERSION "2017.1" #endif /* B.A.T.M.A.N. parameters */ @@ -193,6 +193,7 @@ enum batadv_uev_type { #include <linux/percpu.h> #include <linux/types.h> +#include "packet.h" #include "types.h" struct net_device; @@ -200,8 +201,19 @@ struct packet_type; struct seq_file; struct sk_buff; -#define BATADV_PRINT_VID(vid) (((vid) & BATADV_VLAN_HAS_TAG) ? \ - (int)((vid) & VLAN_VID_MASK) : -1) +/** + * batadv_print_vid - return printable version of vid information + * @vid: the VLAN identifier + * + * Return: -1 when no VLAN is used, VLAN id otherwise + */ +static inline int batadv_print_vid(unsigned short vid) +{ + if (vid & BATADV_VLAN_HAS_TAG) + return (int)(vid & VLAN_VID_MASK); + else + return -1; +} extern struct list_head batadv_hardif_list; diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 952ba81a565b..d327670641ac 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -494,9 +494,8 @@ static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv) if (!bridged) goto update; -#if !IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) - pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n"); -#endif + if (!IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)) + pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n"); querier4.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP); querier4.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP); @@ -671,7 +670,6 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, return 0; } -#if IS_ENABLED(CONFIG_IPV6) /** * batadv_mcast_is_report_ipv6 - check for MLD reports * @skb: the ethernet frame destined for the mesh @@ -736,7 +734,6 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, return 0; } -#endif /** * batadv_mcast_forw_mode_check - check for optimized forwarding potential @@ -765,11 +762,12 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, case ETH_P_IP: return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb, is_unsnoopable); -#if IS_ENABLED(CONFIG_IPV6) case ETH_P_IPV6: + if (!IS_ENABLED(CONFIG_IPV6)) + return -EINVAL; + return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb, is_unsnoopable); -#endif default: return -EINVAL; } diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 7fd740b6e36d..e1ebe14ee2a6 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -941,15 +941,17 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct batadv_unicast_packet *unicast_packet; struct batadv_unicast_4addr_packet *unicast_4addr_packet; - u8 *orig_addr; - struct batadv_orig_node *orig_node = NULL; + u8 *orig_addr, *orig_addr_gw; + struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL; int check, hdr_size = sizeof(*unicast_packet); enum batadv_subtype subtype; - bool is4addr; + struct ethhdr *ethhdr; int ret = NET_RX_DROP; + bool is4addr, is_gw; unicast_packet = (struct batadv_unicast_packet *)skb->data; unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; + ethhdr = eth_hdr(skb); is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR; /* the caller function should have already pulled 2 bytes */ @@ -972,6 +974,23 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, /* packet for me */ if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { + /* If this is a unicast packet from another backgone gw, + * drop it. + */ + orig_addr_gw = ethhdr->h_source; + orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw); + if (orig_node_gw) { + is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw, + hdr_size); + batadv_orig_node_put(orig_node_gw); + if (is_gw) { + batadv_dbg(BATADV_DBG_BLA, bat_priv, + "recv_unicast_packet(): Dropped unicast pkt received from another backbone gw %pM.\n", + orig_addr_gw); + return NET_RX_DROP; + } + } + if (is4addr) { subtype = unicast_4addr_packet->subtype; batadv_dat_inc_counter(bat_priv, subtype); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 1489ec27daff..403df596a73d 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -482,6 +482,7 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet, * @if_outgoing: The (optional) if_outgoing to be grabbed * @queue_left: The (optional) queue counter to decrease * @bat_priv: The bat_priv for the mesh of this forw_packet + * @skb: The raw packet this forwarding packet shall contain * * Allocates a forwarding packet and tries to get a reference to the * (optional) if_incoming, if_outgoing and queue_left. If queue_left @@ -493,7 +494,8 @@ struct batadv_forw_packet * batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing, atomic_t *queue_left, - struct batadv_priv *bat_priv) + struct batadv_priv *bat_priv, + struct sk_buff *skb) { struct batadv_forw_packet *forw_packet; const char *qname; @@ -525,7 +527,7 @@ batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, INIT_HLIST_NODE(&forw_packet->list); INIT_HLIST_NODE(&forw_packet->cleanup_list); - forw_packet->skb = NULL; + forw_packet->skb = skb; forw_packet->queue_left = queue_left; forw_packet->if_incoming = if_incoming; forw_packet->if_outgoing = if_outgoing; @@ -756,22 +758,23 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, if (!primary_if) goto err; + newskb = skb_copy(skb, GFP_ATOMIC); + if (!newskb) { + batadv_hardif_put(primary_if); + goto err; + } + forw_packet = batadv_forw_packet_alloc(primary_if, NULL, &bat_priv->bcast_queue_left, - bat_priv); + bat_priv, newskb); batadv_hardif_put(primary_if); if (!forw_packet) - goto err; - - newskb = skb_copy(skb, GFP_ATOMIC); - if (!newskb) goto err_packet_free; /* as we have a copy now, it is safe to decrease the TTL */ bcast_packet = (struct batadv_bcast_packet *)newskb->data; bcast_packet->ttl--; - forw_packet->skb = newskb; forw_packet->own = own_packet; INIT_DELAYED_WORK(&forw_packet->delayed_work, @@ -781,11 +784,60 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, return NETDEV_TX_OK; err_packet_free: - batadv_forw_packet_free(forw_packet, true); + kfree_skb(newskb); err: return NETDEV_TX_BUSY; } +/** + * batadv_forw_packet_bcasts_left - check if a retransmission is necessary + * @forw_packet: the forwarding packet to check + * @hard_iface: the interface to check on + * + * Checks whether a given packet has any (re)transmissions left on the provided + * interface. + * + * hard_iface may be NULL: In that case the number of transmissions this skb had + * so far is compared with the maximum amount of retransmissions independent of + * any interface instead. + * + * Return: True if (re)transmissions are left, false otherwise. + */ +static bool +batadv_forw_packet_bcasts_left(struct batadv_forw_packet *forw_packet, + struct batadv_hard_iface *hard_iface) +{ + unsigned int max; + + if (hard_iface) + max = hard_iface->num_bcasts; + else + max = BATADV_NUM_BCASTS_MAX; + + return BATADV_SKB_CB(forw_packet->skb)->num_bcasts < max; +} + +/** + * batadv_forw_packet_bcasts_inc - increment retransmission counter of a packet + * @forw_packet: the packet to increase the counter for + */ +static void +batadv_forw_packet_bcasts_inc(struct batadv_forw_packet *forw_packet) +{ + BATADV_SKB_CB(forw_packet->skb)->num_bcasts++; +} + +/** + * batadv_forw_packet_is_rebroadcast - check packet for previous transmissions + * @forw_packet: the packet to check + * + * Return: True if this packet was transmitted before, false otherwise. + */ +bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet) +{ + return BATADV_SKB_CB(forw_packet->skb)->num_bcasts > 0; +} + static void batadv_send_outstanding_bcast_packet(struct work_struct *work) { struct batadv_hard_iface *hard_iface; @@ -826,7 +878,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) if (hard_iface->soft_iface != soft_iface) continue; - if (forw_packet->num_packets >= hard_iface->num_bcasts) + if (!batadv_forw_packet_bcasts_left(forw_packet, hard_iface)) continue; if (forw_packet->own) { @@ -884,10 +936,10 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) } rcu_read_unlock(); - forw_packet->num_packets++; + batadv_forw_packet_bcasts_inc(forw_packet); /* if we still have some more bcasts to send */ - if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) { + if (batadv_forw_packet_bcasts_left(forw_packet, NULL)) { batadv_forw_packet_bcast_queue(bat_priv, forw_packet, send_time); return; diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index f21166d10323..a16b34f473ef 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -34,11 +34,13 @@ struct batadv_forw_packet * batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing, atomic_t *queue_left, - struct batadv_priv *bat_priv); + struct batadv_priv *bat_priv, + struct sk_buff *skb); bool batadv_forw_packet_steal(struct batadv_forw_packet *packet, spinlock_t *l); void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, unsigned long send_time); +bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet); int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 5d099b2e6cfc..b25789abf7b9 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -64,28 +64,6 @@ #include "sysfs.h" #include "translation-table.h" -static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); -static void batadv_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info); -static u32 batadv_get_msglevel(struct net_device *dev); -static void batadv_set_msglevel(struct net_device *dev, u32 value); -static u32 batadv_get_link(struct net_device *dev); -static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data); -static void batadv_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data); -static int batadv_get_sset_count(struct net_device *dev, int stringset); - -static const struct ethtool_ops batadv_ethtool_ops = { - .get_settings = batadv_get_settings, - .get_drvinfo = batadv_get_drvinfo, - .get_msglevel = batadv_get_msglevel, - .set_msglevel = batadv_set_msglevel, - .get_link = batadv_get_link, - .get_strings = batadv_get_strings, - .get_ethtool_stats = batadv_get_ethtool_stats, - .get_sset_count = batadv_get_sset_count, -}; - int batadv_skb_head_push(struct sk_buff *skb, unsigned int len) { int result; @@ -140,7 +118,7 @@ static u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx) static struct net_device_stats *batadv_interface_stats(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); - struct net_device_stats *stats = &bat_priv->stats; + struct net_device_stats *stats = &dev->stats; stats->tx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_TX); stats->tx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_TX_BYTES); @@ -230,6 +208,9 @@ static int batadv_interface_tx(struct sk_buff *skb, if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; + /* reset control block to avoid left overs from previous users */ + memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); + netif_trans_update(soft_iface); vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); @@ -819,7 +800,6 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0); #endif atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF); - atomic_set(&bat_priv->gw.sel_class, 20); atomic_set(&bat_priv->gw.bandwidth_down, 100); atomic_set(&bat_priv->gw.bandwidth_up, 20); atomic_set(&bat_priv->orig_interval, 1000); @@ -948,6 +928,98 @@ static const struct net_device_ops batadv_netdev_ops = { .ndo_del_slave = batadv_softif_slave_del, }; +static void batadv_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); + strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); + strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strlcpy(info->bus_info, "batman", sizeof(info->bus_info)); +} + +/* Inspired by drivers/net/ethernet/dlink/sundance.c:1702 + * Declare each description string in struct.name[] to get fixed sized buffer + * and compile time checking for strings longer than ETH_GSTRING_LEN. + */ +static const struct { + const char name[ETH_GSTRING_LEN]; +} batadv_counters_strings[] = { + { "tx" }, + { "tx_bytes" }, + { "tx_dropped" }, + { "rx" }, + { "rx_bytes" }, + { "forward" }, + { "forward_bytes" }, + { "mgmt_tx" }, + { "mgmt_tx_bytes" }, + { "mgmt_rx" }, + { "mgmt_rx_bytes" }, + { "frag_tx" }, + { "frag_tx_bytes" }, + { "frag_rx" }, + { "frag_rx_bytes" }, + { "frag_fwd" }, + { "frag_fwd_bytes" }, + { "tt_request_tx" }, + { "tt_request_rx" }, + { "tt_response_tx" }, + { "tt_response_rx" }, + { "tt_roam_adv_tx" }, + { "tt_roam_adv_rx" }, +#ifdef CONFIG_BATMAN_ADV_DAT + { "dat_get_tx" }, + { "dat_get_rx" }, + { "dat_put_tx" }, + { "dat_put_rx" }, + { "dat_cached_reply_tx" }, +#endif +#ifdef CONFIG_BATMAN_ADV_NC + { "nc_code" }, + { "nc_code_bytes" }, + { "nc_recode" }, + { "nc_recode_bytes" }, + { "nc_buffer" }, + { "nc_decode" }, + { "nc_decode_bytes" }, + { "nc_decode_failed" }, + { "nc_sniffed" }, +#endif +}; + +static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + if (stringset == ETH_SS_STATS) + memcpy(data, batadv_counters_strings, + sizeof(batadv_counters_strings)); +} + +static void batadv_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct batadv_priv *bat_priv = netdev_priv(dev); + int i; + + for (i = 0; i < BATADV_CNT_NUM; i++) + data[i] = batadv_sum_counter(bat_priv, i); +} + +static int batadv_get_sset_count(struct net_device *dev, int stringset) +{ + if (stringset == ETH_SS_STATS) + return BATADV_CNT_NUM; + + return -EOPNOTSUPP; +} + +static const struct ethtool_ops batadv_ethtool_ops = { + .get_drvinfo = batadv_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = batadv_get_strings, + .get_ethtool_stats = batadv_get_ethtool_stats, + .get_sset_count = batadv_get_sset_count, +}; + /** * batadv_softif_free - Deconstructor of batadv_soft_interface * @dev: Device to cleanup and remove @@ -972,8 +1044,6 @@ static void batadv_softif_free(struct net_device *dev) */ static void batadv_softif_init_early(struct net_device *dev) { - struct batadv_priv *priv = netdev_priv(dev); - ether_setup(dev); dev->netdev_ops = &batadv_netdev_ops; @@ -990,8 +1060,6 @@ static void batadv_softif_init_early(struct net_device *dev) eth_hw_addr_random(dev); dev->ethtool_ops = &batadv_ethtool_ops; - - memset(priv, 0, sizeof(*priv)); } struct net_device *batadv_softif_create(struct net *net, const char *name) @@ -1084,118 +1152,3 @@ struct rtnl_link_ops batadv_link_ops __read_mostly = { .setup = batadv_softif_init_early, .dellink = batadv_softif_destroy_netlink, }; - -/* ethtool */ -static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - cmd->supported = 0; - cmd->advertising = 0; - ethtool_cmd_speed_set(cmd, SPEED_10); - cmd->duplex = DUPLEX_FULL; - cmd->port = PORT_TP; - cmd->phy_address = 0; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = AUTONEG_DISABLE; - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 0; - - return 0; -} - -static void batadv_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) -{ - strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); - strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); - strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); - strlcpy(info->bus_info, "batman", sizeof(info->bus_info)); -} - -static u32 batadv_get_msglevel(struct net_device *dev) -{ - return -EOPNOTSUPP; -} - -static void batadv_set_msglevel(struct net_device *dev, u32 value) -{ -} - -static u32 batadv_get_link(struct net_device *dev) -{ - return 1; -} - -/* Inspired by drivers/net/ethernet/dlink/sundance.c:1702 - * Declare each description string in struct.name[] to get fixed sized buffer - * and compile time checking for strings longer than ETH_GSTRING_LEN. - */ -static const struct { - const char name[ETH_GSTRING_LEN]; -} batadv_counters_strings[] = { - { "tx" }, - { "tx_bytes" }, - { "tx_dropped" }, - { "rx" }, - { "rx_bytes" }, - { "forward" }, - { "forward_bytes" }, - { "mgmt_tx" }, - { "mgmt_tx_bytes" }, - { "mgmt_rx" }, - { "mgmt_rx_bytes" }, - { "frag_tx" }, - { "frag_tx_bytes" }, - { "frag_rx" }, - { "frag_rx_bytes" }, - { "frag_fwd" }, - { "frag_fwd_bytes" }, - { "tt_request_tx" }, - { "tt_request_rx" }, - { "tt_response_tx" }, - { "tt_response_rx" }, - { "tt_roam_adv_tx" }, - { "tt_roam_adv_rx" }, -#ifdef CONFIG_BATMAN_ADV_DAT - { "dat_get_tx" }, - { "dat_get_rx" }, - { "dat_put_tx" }, - { "dat_put_rx" }, - { "dat_cached_reply_tx" }, -#endif -#ifdef CONFIG_BATMAN_ADV_NC - { "nc_code" }, - { "nc_code_bytes" }, - { "nc_recode" }, - { "nc_recode_bytes" }, - { "nc_buffer" }, - { "nc_decode" }, - { "nc_decode_bytes" }, - { "nc_decode_failed" }, - { "nc_sniffed" }, -#endif -}; - -static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data) -{ - if (stringset == ETH_SS_STATS) - memcpy(data, batadv_counters_strings, - sizeof(batadv_counters_strings)); -} - -static void batadv_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data) -{ - struct batadv_priv *bat_priv = netdev_priv(dev); - int i; - - for (i = 0; i < BATADV_CNT_NUM; i++) - data[i] = batadv_sum_counter(bat_priv, i); -} - -static int batadv_get_sset_count(struct net_device *dev, int stringset) -{ - if (stringset == ETH_SS_STATS) - return BATADV_CNT_NUM; - - return -EOPNOTSUPP; -} diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index c94ebdecdc3d..556f9a865ddf 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -873,8 +873,8 @@ static int batadv_tp_send(void *arg) /* something went wrong during the preparation/transmission */ if (unlikely(err && err != BATADV_TP_REASON_CANT_SEND)) { batadv_dbg(BATADV_DBG_TP_METER, bat_priv, - "Meter: batadv_tp_send() cannot send packets (%d)\n", - err); + "Meter: %s() cannot send packets (%d)\n", + __func__, err); /* ensure nobody else tries to stop the thread now */ if (atomic_dec_and_test(&tp_vars->sending)) tp_vars->reason = err; @@ -979,7 +979,8 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, if (!tp_vars) { spin_unlock_bh(&bat_priv->tp_list_lock); batadv_dbg(BATADV_DBG_TP_METER, bat_priv, - "Meter: batadv_tp_start cannot allocate list elements\n"); + "Meter: %s cannot allocate list elements\n", + __func__); batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR, dst, bat_priv, session_cookie); return; diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 6077a87d46f0..e75b4937b497 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -617,7 +617,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, - BATADV_PRINT_VID(tt_global->common.vid), message); + batadv_print_vid(tt_global->common.vid), message); batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, batadv_choose_tt, &tt_global->common); @@ -671,7 +671,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (tt_local->common.flags & BATADV_TT_CLIENT_PENDING) { batadv_dbg(BATADV_DBG_TT, bat_priv, "Re-adding pending client %pM (vid: %d)\n", - addr, BATADV_PRINT_VID(vid)); + addr, batadv_print_vid(vid)); /* whatever the reason why the PENDING flag was set, * this is a client which was enqueued to be removed in * this orig_interval. Since it popped up again, the @@ -684,7 +684,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (tt_local->common.flags & BATADV_TT_CLIENT_ROAM) { batadv_dbg(BATADV_DBG_TT, bat_priv, "Roaming client %pM (vid: %d) came back to its original location\n", - addr, BATADV_PRINT_VID(vid)); + addr, batadv_print_vid(vid)); /* the ROAM flag is set because this client roamed away * and the node got a roaming_advertisement message. Now * that the client popped up again at its original @@ -716,7 +716,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (!vlan) { net_ratelimited_function(batadv_info, soft_iface, "adding TT local entry %pM to non-existent VLAN %d\n", - addr, BATADV_PRINT_VID(vid)); + addr, batadv_print_vid(vid)); kmem_cache_free(batadv_tl_cache, tt_local); tt_local = NULL; goto out; @@ -724,7 +724,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n", - addr, BATADV_PRINT_VID(vid), + addr, batadv_print_vid(vid), (u8)atomic_read(&bat_priv->tt.vn)); ether_addr_copy(tt_local->common.addr, addr); @@ -1097,7 +1097,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n", tt_common_entry->addr, - BATADV_PRINT_VID(tt_common_entry->vid), + batadv_print_vid(tt_common_entry->vid), ((tt_common_entry->flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'), no_purge ? 'P' : '.', @@ -1296,7 +1296,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Local tt entry (%pM, vid: %d) pending to be removed: %s\n", tt_local_entry->common.addr, - BATADV_PRINT_VID(tt_local_entry->common.vid), message); + batadv_print_vid(tt_local_entry->common.vid), message); } /** @@ -1727,7 +1727,7 @@ add_orig_entry: batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new global tt entry: %pM (vid: %d, via %pM)\n", - common->addr, BATADV_PRINT_VID(common->vid), + common->addr, batadv_print_vid(common->vid), orig_node->orig); ret = true; @@ -1835,7 +1835,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv, if (!vlan) { seq_printf(seq, " * Cannot retrieve VLAN %d for originator %pM\n", - BATADV_PRINT_VID(tt_common_entry->vid), + batadv_print_vid(tt_common_entry->vid), best_entry->orig_node->orig); goto print_list; } @@ -1844,7 +1844,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv, seq_printf(seq, " %c %pM %4i (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n", '*', tt_global_entry->common.addr, - BATADV_PRINT_VID(tt_global_entry->common.vid), + batadv_print_vid(tt_global_entry->common.vid), best_entry->ttvn, best_entry->orig_node->orig, last_ttvn, vlan->tt.crc, ((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'), @@ -1867,7 +1867,7 @@ print_list: if (!vlan) { seq_printf(seq, " + Cannot retrieve VLAN %d for originator %pM\n", - BATADV_PRINT_VID(tt_common_entry->vid), + batadv_print_vid(tt_common_entry->vid), orig_entry->orig_node->orig); continue; } @@ -1876,7 +1876,7 @@ print_list: seq_printf(seq, " %c %pM %4d (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n", '+', tt_global_entry->common.addr, - BATADV_PRINT_VID(tt_global_entry->common.vid), + batadv_print_vid(tt_global_entry->common.vid), orig_entry->ttvn, orig_entry->orig_node->orig, last_ttvn, vlan->tt.crc, ((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'), @@ -2213,7 +2213,7 @@ batadv_tt_global_del_orig_node(struct batadv_priv *bat_priv, "Deleting %pM from global tt entry %pM (vid: %d): %s\n", orig_node->orig, tt_global_entry->common.addr, - BATADV_PRINT_VID(vid), message); + batadv_print_vid(vid), message); _batadv_tt_global_del_orig_entry(tt_global_entry, orig_entry); } @@ -2253,12 +2253,13 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, /* its the last one, mark for roaming. */ tt_global_entry->common.flags |= BATADV_TT_CLIENT_ROAM; tt_global_entry->roam_at = jiffies; - } else + } else { /* there is another entry, we can simply delete this * one and can still use the other one. */ batadv_tt_global_del_orig_node(bat_priv, tt_global_entry, orig_node, message); + } } /** @@ -2314,10 +2315,11 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv, /* local entry exists, case 2: client roamed to us. */ batadv_tt_global_del_orig_list(tt_global_entry); batadv_tt_global_free(bat_priv, tt_global_entry, message); - } else + } else { /* no local entry exists, case 1: check for roaming */ batadv_tt_global_del_roaming(bat_priv, tt_global_entry, orig_node, message); + } out: if (tt_global_entry) @@ -2375,7 +2377,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, - BATADV_PRINT_VID(vid), message); + batadv_print_vid(vid), message); hlist_del_rcu(&tt_common_entry->hash_entry); batadv_tt_global_entry_put(tt_global); } @@ -2435,7 +2437,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv) batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, - BATADV_PRINT_VID(tt_global->common.vid), + batadv_print_vid(tt_global->common.vid), msg); hlist_del_rcu(&tt_common->hash_entry); @@ -3650,7 +3652,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client, batadv_dbg(BATADV_DBG_TT, bat_priv, "Sending ROAMING_ADV to %pM (client %pM, vid: %d)\n", - orig_node->orig, client, BATADV_PRINT_VID(vid)); + orig_node->orig, client, batadv_print_vid(vid)); batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX); @@ -3773,7 +3775,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting local tt entry (%pM, vid: %d): pending\n", tt_common->addr, - BATADV_PRINT_VID(tt_common->vid)); + batadv_print_vid(tt_common->vid)); batadv_tt_local_size_dec(bat_priv, tt_common->vid); hlist_del_rcu(&tt_common->hash_entry); @@ -4017,7 +4019,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Added temporary global client (addr: %pM, vid: %d, orig: %pM)\n", - addr, BATADV_PRINT_VID(vid), orig_node->orig); + addr, batadv_print_vid(vid), orig_node->orig); ret = true; out: return ret; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 66b25e410a41..ea43a6449247 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1000,7 +1000,6 @@ struct batadv_priv_bat_v { * struct batadv_priv - per mesh interface data * @mesh_state: current status of the mesh (inactive/active/deactivating) * @soft_iface: net device which holds this struct as private data - * @stats: structure holding the data for the ndo_get_stats() call * @bat_counters: mesh internal traffic statistic counters (see batadv_counters) * @aggregated_ogms: bool indicating whether OGM aggregation is enabled * @bonding: bool indicating whether traffic bonding is enabled @@ -1055,7 +1054,6 @@ struct batadv_priv_bat_v { struct batadv_priv { atomic_t mesh_state; struct net_device *soft_iface; - struct net_device_stats stats; u64 __percpu *bat_counters; /* Per cpu counters */ atomic_t aggregated_ogms; atomic_t bonding; @@ -1377,9 +1375,11 @@ struct batadv_nc_packet { * relevant to batman-adv in the skb->cb buffer in skbs. * @decoded: Marks a skb as decoded, which is checked when searching for coding * opportunities in network-coding.c + * @num_bcasts: Counter for broadcast packet retransmissions */ struct batadv_skb_cb { bool decoded; + unsigned int num_bcasts; }; /** @@ -1392,7 +1392,7 @@ struct batadv_skb_cb { * @skb: bcast packet's skb buffer * @packet_len: size of aggregated OGM packet inside the skb buffer * @direct_link_flags: direct link flags for aggregated OGM packets - * @num_packets: counter for bcast packet retransmission + * @num_packets: counter for aggregated OGMv1 packets * @delayed_work: work queue callback item for packet sending * @if_incoming: pointer to incoming hard-iface or primary iface if * locally generated packet @@ -1489,6 +1489,7 @@ struct batadv_algo_orig_ops { /** * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific) + * @init_sel_class: initialize GW selection class (optional) * @store_sel_class: parse and stores a new GW selection class (optional) * @show_sel_class: prints the current GW selection class (optional) * @get_best_gw_node: select the best GW from the list of available nodes @@ -1499,6 +1500,7 @@ struct batadv_algo_orig_ops { * @dump: dump gateways to a netlink socket (optional) */ struct batadv_algo_gw_ops { + void (*init_sel_class)(struct batadv_priv *bat_priv); ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff, size_t count); ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff); diff --git a/net/bpf/Makefile b/net/bpf/Makefile new file mode 100644 index 000000000000..27b2992a0692 --- /dev/null +++ b/net/bpf/Makefile @@ -0,0 +1 @@ +obj-y := test_run.o diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c new file mode 100644 index 000000000000..8a6d0a37c30c --- /dev/null +++ b/net/bpf/test_run.c @@ -0,0 +1,172 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/bpf.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/etherdevice.h> +#include <linux/filter.h> +#include <linux/sched/signal.h> + +static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx) +{ + u32 ret; + + preempt_disable(); + rcu_read_lock(); + ret = BPF_PROG_RUN(prog, ctx); + rcu_read_unlock(); + preempt_enable(); + + return ret; +} + +static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time) +{ + u64 time_start, time_spent = 0; + u32 ret = 0, i; + + if (!repeat) + repeat = 1; + time_start = ktime_get_ns(); + for (i = 0; i < repeat; i++) { + ret = bpf_test_run_one(prog, ctx); + if (need_resched()) { + if (signal_pending(current)) + break; + time_spent += ktime_get_ns() - time_start; + cond_resched(); + time_start = ktime_get_ns(); + } + } + time_spent += ktime_get_ns() - time_start; + do_div(time_spent, repeat); + *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; + + return ret; +} + +static int bpf_test_finish(union bpf_attr __user *uattr, const void *data, + u32 size, u32 retval, u32 duration) +{ + void __user *data_out = u64_to_user_ptr(uattr->test.data_out); + int err = -EFAULT; + + if (data_out && copy_to_user(data_out, data, size)) + goto out; + if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size))) + goto out; + if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) + goto out; + if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration))) + goto out; + err = 0; +out: + return err; +} + +static void *bpf_test_init(const union bpf_attr *kattr, u32 size, + u32 headroom, u32 tailroom) +{ + void __user *data_in = u64_to_user_ptr(kattr->test.data_in); + void *data; + + if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom) + return ERR_PTR(-EINVAL); + + data = kzalloc(size + headroom + tailroom, GFP_USER); + if (!data) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(data + headroom, data_in, size)) { + kfree(data); + return ERR_PTR(-EFAULT); + } + return data; +} + +int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + bool is_l2 = false, is_direct_pkt_access = false; + u32 size = kattr->test.data_size_in; + u32 repeat = kattr->test.repeat; + u32 retval, duration; + struct sk_buff *skb; + void *data; + int ret; + + data = bpf_test_init(kattr, size, NET_SKB_PAD, + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (IS_ERR(data)) + return PTR_ERR(data); + + switch (prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: + is_l2 = true; + /* fall through */ + case BPF_PROG_TYPE_LWT_IN: + case BPF_PROG_TYPE_LWT_OUT: + case BPF_PROG_TYPE_LWT_XMIT: + is_direct_pkt_access = true; + break; + default: + break; + } + + skb = build_skb(data, 0); + if (!skb) { + kfree(data); + return -ENOMEM; + } + + skb_reserve(skb, NET_SKB_PAD); + __skb_put(skb, size); + skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev); + skb_reset_network_header(skb); + + if (is_l2) + __skb_push(skb, ETH_HLEN); + if (is_direct_pkt_access) + bpf_compute_data_end(skb); + retval = bpf_test_run(prog, skb, repeat, &duration); + if (!is_l2) + __skb_push(skb, ETH_HLEN); + size = skb->len; + /* bpf program can never convert linear skb to non-linear */ + if (WARN_ON_ONCE(skb_is_nonlinear(skb))) + size = skb_headlen(skb); + ret = bpf_test_finish(uattr, skb->data, size, retval, duration); + kfree_skb(skb); + return ret; +} + +int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + u32 size = kattr->test.data_size_in; + u32 repeat = kattr->test.repeat; + struct xdp_buff xdp = {}; + u32 retval, duration; + void *data; + int ret; + + data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM, 0); + if (IS_ERR(data)) + return PTR_ERR(data); + + xdp.data_hard_start = data; + xdp.data = data + XDP_PACKET_HEADROOM; + xdp.data_end = xdp.data + size; + + retval = bpf_test_run(prog, &xdp, repeat, &duration); + if (xdp.data != data + XDP_PACKET_HEADROOM) + size = xdp.data_end - xdp.data; + ret = bpf_test_finish(uattr, xdp.data, size, retval, duration); + kfree(data); + return ret; +} diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ea71513fca21..90f49a194249 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -119,6 +119,15 @@ static int br_dev_init(struct net_device *dev) return err; } +static void br_dev_uninit(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + br_multicast_uninit_stats(br); + br_vlan_flush(br); + free_percpu(br->stats); +} + static int br_dev_open(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -332,6 +341,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_open = br_dev_open, .ndo_stop = br_dev_stop, .ndo_init = br_dev_init, + .ndo_uninit = br_dev_uninit, .ndo_start_xmit = br_dev_xmit, .ndo_get_stats64 = br_get_stats64, .ndo_set_mac_address = br_set_mac_address, @@ -356,14 +366,6 @@ static const struct net_device_ops br_netdev_ops = { .ndo_features_check = passthru_features_check, }; -static void br_dev_free(struct net_device *dev) -{ - struct net_bridge *br = netdev_priv(dev); - - free_percpu(br->stats); - free_netdev(dev); -} - static struct device_type br_type = { .name = "bridge", }; @@ -376,7 +378,7 @@ void br_dev_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &br_netdev_ops; - dev->destructor = br_dev_free; + dev->destructor = free_netdev; dev->ethtool_ops = &br_ethtool_ops; SET_NETDEV_DEVTYPE(dev, &br_type); dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 4f598dc2d916..5a40a87c4f4f 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -106,7 +106,7 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br, struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; - WARN_ON_ONCE(!br_hash_lock_held(br)); + lockdep_assert_held_once(&br->hash_lock); rcu_read_lock(); fdb = fdb_find_rcu(head, addr, vid); @@ -594,6 +594,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, fdb->updated = now; if (unlikely(added_by_user)) fdb->added_by_user = 1; + /* Take over HW learned entry */ + if (unlikely(fdb->added_by_external_learn)) + fdb->added_by_external_learn = 0; if (unlikely(fdb_modified)) fdb_notify(br, fdb, RTM_NEWNEIGH); } @@ -854,6 +857,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, br_fdb_update(br, p, addr, vid, true); rcu_read_unlock(); local_bh_enable(); + } else if (ndm->ndm_flags & NTF_EXT_LEARNED) { + err = br_fdb_external_learn_add(br, p, addr, vid); } else { spin_lock_bh(&br->hash_lock); err = fdb_add_entry(br, p, addr, ndm->ndm_state, diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 8ac1770aa222..6d273ca0bf7c 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -22,6 +22,7 @@ #include <linux/rtnetlink.h> #include <linux/if_ether.h> #include <linux/slab.h> +#include <net/dsa.h> #include <net/sock.h> #include <linux/if_vlan.h> #include <net/switchdev.h> @@ -311,7 +312,6 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) br_fdb_delete_by_port(br, NULL, 0, 1); - br_vlan_flush(br); br_multicast_dev_del(br); cancel_delayed_work_sync(&br->gc_work); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 056e6ac49d8f..993626a7fc3b 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -464,7 +464,8 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *dev; int err; - err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL); + err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL, + NULL); if (err < 0) return err; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index b760f2620abf..faa7261a992f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2031,8 +2031,6 @@ void br_multicast_dev_del(struct net_bridge *br) out: spin_unlock_bh(&br->multicast_lock); - - free_percpu(br->mcast_stats); } int br_multicast_set_router(struct net_bridge *br, unsigned long val) @@ -2531,6 +2529,11 @@ int br_multicast_init_stats(struct net_bridge *br) return 0; } +void br_multicast_uninit_stats(struct net_bridge *br) +{ + free_percpu(br->mcast_stats); +} + static void mcast_stats_add_dir(u64 *dst, u64 *src) { dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX]; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index fa87fbd62bb7..067cf0313449 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -706,18 +706,20 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge; - unsigned int mtu_reserved; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + unsigned int mtu, mtu_reserved; mtu_reserved = nf_bridge_mtu_reduction(skb); + mtu = skb->dev->mtu; + + if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) + mtu = nf_bridge->frag_max_size; - if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) { + if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); } - nf_bridge = nf_bridge_info_get(skb); - /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ @@ -995,13 +997,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, if (!elem) return okfn(net, sk, skb); - /* We may already have this, but read-locks nest anyway */ - rcu_read_lock(); nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state, elem); - rcu_read_unlock(); if (ret == 1) ret = okfn(net, sk, skb); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index a8f6acd23e30..650986473577 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -748,8 +748,8 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) if (p && protinfo) { if (protinfo->nla_type & NLA_F_NESTED) { - err = nla_parse_nested(tb, IFLA_BRPORT_MAX, - protinfo, br_port_policy); + err = nla_parse_nested(tb, IFLA_BRPORT_MAX, protinfo, + br_port_policy, NULL); if (err) return err; @@ -1165,11 +1165,14 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev, spin_unlock_bh(&br->lock); } - err = br_changelink(dev, tb, data); + err = register_netdevice(dev); if (err) return err; - return register_netdevice(dev); + err = br_changelink(dev, tb, data); + if (err) + unregister_netdevice(dev); + return err; } static size_t br_get_size(const struct net_device *brdev) diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c index c913491495ab..3712c7f0e00c 100644 --- a/net/bridge/br_netlink_tunnel.c +++ b/net/bridge/br_netlink_tunnel.c @@ -227,8 +227,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr, memset(tinfo, 0, sizeof(*tinfo)); - err = nla_parse_nested(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, - attr, vlan_tunnel_policy); + err = nla_parse_nested(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, attr, + vlan_tunnel_policy, NULL); if (err < 0) return err; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2288fca7756c..0d177280aa84 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -531,15 +531,6 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid); -static inline bool br_hash_lock_held(struct net_bridge *br) -{ -#ifdef CONFIG_LOCKDEP - return lockdep_is_held(&br->hash_lock); -#else - return true; -#endif -} - /* br_forward.c */ enum br_pkt_type { BR_PKT_UNICAST, @@ -629,6 +620,7 @@ void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, const struct sk_buff *skb, u8 type, u8 dir); int br_multicast_init_stats(struct net_bridge *br); +void br_multicast_uninit_stats(struct net_bridge *br); void br_multicast_get_stats(const struct net_bridge *br, const struct net_bridge_port *p, struct br_mcast_stats *dest); @@ -769,6 +761,10 @@ static inline int br_multicast_init_stats(struct net_bridge *br) return 0; } +static inline void br_multicast_uninit_stats(struct net_bridge *br) +{ +} + static inline int br_multicast_igmp_type(const struct sk_buff *skb) { return 0; diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 98b9c8e8615e..707caea39743 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -62,10 +62,10 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset) pptr = skb_header_pointer(skb, offset, sizeof(_ports), &_ports); if (pptr == NULL) { - printk(" INCOMPLETE TCP/UDP header"); + pr_cont(" INCOMPLETE TCP/UDP header"); return; } - printk(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst)); + pr_cont(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst)); } } @@ -100,11 +100,11 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) { - printk(" INCOMPLETE IP header"); + pr_cont(" INCOMPLETE IP header"); goto out; } - printk(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d", - &ih->saddr, &ih->daddr, ih->tos, ih->protocol); + pr_cont(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d", + &ih->saddr, &ih->daddr, ih->tos, ih->protocol); print_ports(skb, ih->protocol, ih->ihl*4); goto out; } @@ -120,11 +120,11 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) { - printk(" INCOMPLETE IPv6 header"); + pr_cont(" INCOMPLETE IPv6 header"); goto out; } - printk(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d", - &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr); + pr_cont(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d", + &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr); nexthdr = ih->nexthdr; offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr, &frag_off); if (offset_ph == -1) @@ -142,12 +142,12 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL) { - printk(" INCOMPLETE ARP header"); + pr_cont(" INCOMPLETE ARP header"); goto out; } - printk(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d", - ntohs(ah->ar_hrd), ntohs(ah->ar_pro), - ntohs(ah->ar_op)); + pr_cont(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d", + ntohs(ah->ar_hrd), ntohs(ah->ar_pro), + ntohs(ah->ar_op)); /* If it's for Ethernet and the lengths are OK, * then log the ARP payload @@ -161,17 +161,17 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); if (ap == NULL) { - printk(" INCOMPLETE ARP payload"); + pr_cont(" INCOMPLETE ARP payload"); goto out; } - printk(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4", - ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst); + pr_cont(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4", + ap->mac_src, ap->ip_src, + ap->mac_dst, ap->ip_dst); } } out: - printk("\n"); + pr_cont("\n"); spin_unlock_bh(&ebt_log_lock); - } static unsigned int diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 206dc266ecd2..346ef6b00b8f 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -375,11 +375,7 @@ static int nft_reject_bridge_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); - int icmp_code, err; - - err = nft_reject_bridge_validate(ctx, expr, NULL); - if (err < 0) - return err; + int icmp_code; if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; diff --git a/net/can/af_can.c b/net/can/af_can.c index 5488e4a6ccd0..abf7d854a94d 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -75,9 +75,7 @@ static int stats_timer __read_mostly = 1; module_param(stats_timer, int, S_IRUGO); MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)"); -/* receive filters subscribed for 'all' CAN devices */ -struct dev_rcv_lists can_rx_alldev_list; -static DEFINE_SPINLOCK(can_rcvlists_lock); +static int can_net_id; static struct kmem_cache *rcv_cache __read_mostly; @@ -145,9 +143,6 @@ static int can_create(struct net *net, struct socket *sock, int protocol, if (protocol < 0 || protocol >= CAN_NPROTO) return -EINVAL; - if (!net_eq(net, &init_net)) - return -EAFNOSUPPORT; - cp = can_get_proto(protocol); #ifdef CONFIG_MODULES @@ -331,10 +326,11 @@ EXPORT_SYMBOL(can_send); * af_can rx path */ -static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev) +static struct dev_rcv_lists *find_dev_rcv_lists(struct net *net, + struct net_device *dev) { if (!dev) - return &can_rx_alldev_list; + return net->can.can_rx_alldev_list; else return (struct dev_rcv_lists *)dev->ml_priv; } @@ -467,9 +463,9 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, * -ENOMEM on missing cache mem to create subscription entry * -ENODEV unknown device */ -int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, - void (*func)(struct sk_buff *, void *), void *data, - char *ident, struct sock *sk) +int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, + canid_t mask, void (*func)(struct sk_buff *, void *), + void *data, char *ident, struct sock *sk) { struct receiver *r; struct hlist_head *rl; @@ -481,13 +477,16 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, if (dev && dev->type != ARPHRD_CAN) return -ENODEV; + if (dev && !net_eq(net, dev_net(dev))) + return -ENODEV; + r = kmem_cache_alloc(rcv_cache, GFP_KERNEL); if (!r) return -ENOMEM; - spin_lock(&can_rcvlists_lock); + spin_lock(&net->can.can_rcvlists_lock); - d = find_dev_rcv_lists(dev); + d = find_dev_rcv_lists(net, dev); if (d) { rl = find_rcv_list(&can_id, &mask, d); @@ -510,7 +509,7 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, err = -ENODEV; } - spin_unlock(&can_rcvlists_lock); + spin_unlock(&net->can.can_rcvlists_lock); return err; } @@ -540,8 +539,9 @@ static void can_rx_delete_receiver(struct rcu_head *rp) * Description: * Removes subscription entry depending on given (subscription) values. */ -void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, - void (*func)(struct sk_buff *, void *), void *data) +void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id, + canid_t mask, void (*func)(struct sk_buff *, void *), + void *data) { struct receiver *r = NULL; struct hlist_head *rl; @@ -550,9 +550,12 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, if (dev && dev->type != ARPHRD_CAN) return; - spin_lock(&can_rcvlists_lock); + if (dev && !net_eq(net, dev_net(dev))) + return; - d = find_dev_rcv_lists(dev); + spin_lock(&net->can.can_rcvlists_lock); + + d = find_dev_rcv_lists(net, dev); if (!d) { pr_err("BUG: receive list not found for " "dev %s, id %03X, mask %03X\n", @@ -598,7 +601,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, } out: - spin_unlock(&can_rcvlists_lock); + spin_unlock(&net->can.can_rcvlists_lock); /* schedule the receiver item for deletion */ if (r) { @@ -696,10 +699,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); /* deliver the packet to sockets listening on all devices */ - matches = can_rcv_filter(&can_rx_alldev_list, skb); + matches = can_rcv_filter(dev_net(dev)->can.can_rx_alldev_list, skb); /* find receive list for this device */ - d = find_dev_rcv_lists(dev); + d = find_dev_rcv_lists(dev_net(dev), dev); if (d) matches += can_rcv_filter(d, skb); @@ -719,9 +722,6 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(!net_eq(dev_net(dev), &init_net))) - goto drop; - if (WARN_ONCE(dev->type != ARPHRD_CAN || skb->len != CAN_MTU || cfd->len > CAN_MAX_DLEN, @@ -743,9 +743,6 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(!net_eq(dev_net(dev), &init_net))) - goto drop; - if (WARN_ONCE(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU || cfd->len > CANFD_MAX_DLEN, @@ -835,9 +832,6 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dev_rcv_lists *d; - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; @@ -855,7 +849,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, break; case NETDEV_UNREGISTER: - spin_lock(&can_rcvlists_lock); + spin_lock(&dev_net(dev)->can.can_rcvlists_lock); d = dev->ml_priv; if (d) { @@ -869,7 +863,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, pr_err("can: notifier: receive list not found for dev " "%s\n", dev->name); - spin_unlock(&can_rcvlists_lock); + spin_unlock(&dev_net(dev)->can.can_rcvlists_lock); break; } @@ -877,6 +871,40 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, return NOTIFY_DONE; } +static int can_pernet_init(struct net *net) +{ + net->can.can_rcvlists_lock = + __SPIN_LOCK_UNLOCKED(net->can.can_rcvlists_lock); + net->can.can_rx_alldev_list = + kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL); + + if (IS_ENABLED(CONFIG_PROC_FS)) + can_init_proc(net); + + return 0; +} + +static void can_pernet_exit(struct net *net) +{ + struct net_device *dev; + + if (IS_ENABLED(CONFIG_PROC_FS)) + can_remove_proc(net); + + /* remove created dev_rcv_lists from still registered CAN devices */ + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + if (dev->type == ARPHRD_CAN && dev->ml_priv) { + struct dev_rcv_lists *d = dev->ml_priv; + + BUG_ON(d->entries); + kfree(d); + dev->ml_priv = NULL; + } + } + rcu_read_unlock(); +} + /* * af_can module init/exit functions */ @@ -902,6 +930,13 @@ static struct notifier_block can_netdev_notifier __read_mostly = { .notifier_call = can_notifier, }; +static struct pernet_operations can_pernet_ops __read_mostly = { + .init = can_pernet_init, + .exit = can_pernet_exit, + .id = &can_net_id, + .size = 0, +}; + static __init int can_init(void) { /* check for correct padding to be able to use the structs similarly */ @@ -912,8 +947,6 @@ static __init int can_init(void) pr_info("can: controller area network core (" CAN_VERSION_STRING ")\n"); - memset(&can_rx_alldev_list, 0, sizeof(can_rx_alldev_list)); - rcv_cache = kmem_cache_create("can_receiver", sizeof(struct receiver), 0, 0, NULL); if (!rcv_cache) @@ -925,9 +958,10 @@ static __init int can_init(void) setup_timer(&can_stattimer, can_stat_update, 0); mod_timer(&can_stattimer, round_jiffies(jiffies + HZ)); } - can_init_proc(); } + register_pernet_subsys(&can_pernet_ops); + /* protocol register */ sock_register(&can_family_ops); register_netdevice_notifier(&can_netdev_notifier); @@ -939,13 +973,9 @@ static __init int can_init(void) static __exit void can_exit(void) { - struct net_device *dev; - if (IS_ENABLED(CONFIG_PROC_FS)) { if (stats_timer) del_timer_sync(&can_stattimer); - - can_remove_proc(); } /* protocol unregister */ @@ -954,19 +984,7 @@ static __exit void can_exit(void) unregister_netdevice_notifier(&can_netdev_notifier); sock_unregister(PF_CAN); - /* remove created dev_rcv_lists from still registered CAN devices */ - rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if (dev->type == ARPHRD_CAN && dev->ml_priv) { - - struct dev_rcv_lists *d = dev->ml_priv; - - BUG_ON(d->entries); - kfree(d); - dev->ml_priv = NULL; - } - } - rcu_read_unlock(); + unregister_pernet_subsys(&can_pernet_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/can/af_can.h b/net/can/af_can.h index b86f5129e838..f273c9d9b129 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -114,8 +114,8 @@ struct s_pstats { extern struct dev_rcv_lists can_rx_alldev_list; /* function prototypes for the CAN networklayer procfs (proc.c) */ -void can_init_proc(void); -void can_remove_proc(void); +void can_init_proc(struct net *net); +void can_remove_proc(struct net *net); void can_stat_update(unsigned long data); /* structures and variables from af_can.c needed in proc.c for reading */ diff --git a/net/can/bcm.c b/net/can/bcm.c index 95d13b233c65..1976629a8463 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -764,8 +764,8 @@ static void bcm_remove_op(struct bcm_op *op) static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op) { if (op->rx_reg_dev == dev) { - can_rx_unregister(dev, op->can_id, REGMASK(op->can_id), - bcm_rx_handler, op); + can_rx_unregister(&init_net, dev, op->can_id, + REGMASK(op->can_id), bcm_rx_handler, op); /* mark as removed subscription */ op->rx_reg_dev = NULL; @@ -808,7 +808,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, } } } else - can_rx_unregister(NULL, op->can_id, + can_rx_unregister(&init_net, NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); @@ -1222,7 +1222,8 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, dev = dev_get_by_index(&init_net, ifindex); if (dev) { - err = can_rx_register(dev, op->can_id, + err = can_rx_register(&init_net, dev, + op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, "bcm", sk); @@ -1232,7 +1233,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } } else - err = can_rx_register(NULL, op->can_id, + err = can_rx_register(&init_net, NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, "bcm", sk); if (err) { @@ -1528,7 +1529,7 @@ static int bcm_release(struct socket *sock) } } } else - can_rx_unregister(NULL, op->can_id, + can_rx_unregister(&init_net, NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); diff --git a/net/can/gw.c b/net/can/gw.c index 7056a1a2bb70..3b84fb7d98aa 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -440,14 +440,14 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) static inline int cgw_register_filter(struct cgw_job *gwj) { - return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id, + return can_rx_register(&init_net, gwj->src.dev, gwj->ccgw.filter.can_id, gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj, "gw", NULL); } static inline void cgw_unregister_filter(struct cgw_job *gwj) { - can_rx_unregister(gwj->src.dev, gwj->ccgw.filter.can_id, + can_rx_unregister(&init_net, gwj->src.dev, gwj->ccgw.filter.can_id, gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj); } @@ -641,7 +641,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, memset(mod, 0, sizeof(*mod)); err = nlmsg_parse(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX, - cgw_policy); + cgw_policy, NULL); if (err < 0) return err; diff --git a/net/can/proc.c b/net/can/proc.c index 85ef7bb0f176..9a8d54d57b22 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -62,17 +62,6 @@ #define CAN_PROC_RCVLIST_EFF "rcvlist_eff" #define CAN_PROC_RCVLIST_ERR "rcvlist_err" -static struct proc_dir_entry *can_dir; -static struct proc_dir_entry *pde_version; -static struct proc_dir_entry *pde_stats; -static struct proc_dir_entry *pde_reset_stats; -static struct proc_dir_entry *pde_rcvlist_all; -static struct proc_dir_entry *pde_rcvlist_fil; -static struct proc_dir_entry *pde_rcvlist_inv; -static struct proc_dir_entry *pde_rcvlist_sff; -static struct proc_dir_entry *pde_rcvlist_eff; -static struct proc_dir_entry *pde_rcvlist_err; - static int user_reset; static const char rx_list_name[][8] = { @@ -351,20 +340,21 @@ static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx, static int can_rcvlist_proc_show(struct seq_file *m, void *v) { /* double cast to prevent GCC warning */ - int idx = (int)(long)m->private; + int idx = (int)(long)PDE_DATA(m->file->f_inode); struct net_device *dev; struct dev_rcv_lists *d; + struct net *net = m->private; seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]); rcu_read_lock(); /* receive list for 'all' CAN devices (dev == NULL) */ - d = &can_rx_alldev_list; + d = net->can.can_rx_alldev_list; can_rcvlist_proc_show_one(m, idx, NULL, d); /* receive list for registered CAN devices */ - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) can_rcvlist_proc_show_one(m, idx, dev, dev->ml_priv); } @@ -377,7 +367,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v) static int can_rcvlist_proc_open(struct inode *inode, struct file *file) { - return single_open(file, can_rcvlist_proc_show, PDE_DATA(inode)); + return single_open_net(inode, file, can_rcvlist_proc_show); } static const struct file_operations can_rcvlist_proc_fops = { @@ -417,6 +407,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; struct dev_rcv_lists *d; + struct net *net = m->private; /* RX_SFF */ seq_puts(m, "\nreceive list 'rx_sff':\n"); @@ -424,11 +415,11 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* sff receive list for 'all' CAN devices (dev == NULL) */ - d = &can_rx_alldev_list; + d = net->can.can_rx_alldev_list; can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff)); /* sff receive list for registered CAN devices */ - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) { d = dev->ml_priv; can_rcvlist_proc_show_array(m, dev, d->rx_sff, @@ -444,7 +435,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) static int can_rcvlist_sff_proc_open(struct inode *inode, struct file *file) { - return single_open(file, can_rcvlist_sff_proc_show, NULL); + return single_open_net(inode, file, can_rcvlist_sff_proc_show); } static const struct file_operations can_rcvlist_sff_proc_fops = { @@ -460,6 +451,7 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; struct dev_rcv_lists *d; + struct net *net = m->private; /* RX_EFF */ seq_puts(m, "\nreceive list 'rx_eff':\n"); @@ -467,11 +459,11 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* eff receive list for 'all' CAN devices (dev == NULL) */ - d = &can_rx_alldev_list; + d = net->can.can_rx_alldev_list; can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff)); /* eff receive list for registered CAN devices */ - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) { d = dev->ml_priv; can_rcvlist_proc_show_array(m, dev, d->rx_eff, @@ -487,7 +479,7 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file) { - return single_open(file, can_rcvlist_eff_proc_show, NULL); + return single_open_net(inode, file, can_rcvlist_eff_proc_show); } static const struct file_operations can_rcvlist_eff_proc_fops = { @@ -499,81 +491,85 @@ static const struct file_operations can_rcvlist_eff_proc_fops = { }; /* - * proc utility functions - */ - -static void can_remove_proc_readentry(const char *name) -{ - if (can_dir) - remove_proc_entry(name, can_dir); -} - -/* * can_init_proc - create main CAN proc directory and procfs entries */ -void can_init_proc(void) +void can_init_proc(struct net *net) { /* create /proc/net/can directory */ - can_dir = proc_mkdir("can", init_net.proc_net); + net->can.proc_dir = proc_net_mkdir(net, "can", net->proc_net); - if (!can_dir) { - pr_info("can: failed to create /proc/net/can.\n"); + if (!net->can.proc_dir) { + printk(KERN_INFO "can: failed to create /proc/net/can . " + "CONFIG_PROC_FS missing?\n"); return; } /* own procfs entries from the AF_CAN core */ - pde_version = proc_create(CAN_PROC_VERSION, 0644, can_dir, - &can_version_proc_fops); - pde_stats = proc_create(CAN_PROC_STATS, 0644, can_dir, - &can_stats_proc_fops); - pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644, can_dir, - &can_reset_stats_proc_fops); - pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644, can_dir, - &can_rcvlist_proc_fops, (void *)RX_ERR); - pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644, can_dir, - &can_rcvlist_proc_fops, (void *)RX_ALL); - pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644, can_dir, - &can_rcvlist_proc_fops, (void *)RX_FIL); - pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644, can_dir, - &can_rcvlist_proc_fops, (void *)RX_INV); - pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644, can_dir, - &can_rcvlist_eff_proc_fops); - pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644, can_dir, - &can_rcvlist_sff_proc_fops); + net->can.pde_version = proc_create(CAN_PROC_VERSION, 0644, + net->can.proc_dir, + &can_version_proc_fops); + net->can.pde_stats = proc_create(CAN_PROC_STATS, 0644, + net->can.proc_dir, + &can_stats_proc_fops); + net->can.pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644, + net->can.proc_dir, + &can_reset_stats_proc_fops); + net->can.pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644, + net->can.proc_dir, + &can_rcvlist_proc_fops, + (void *)RX_ERR); + net->can.pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644, + net->can.proc_dir, + &can_rcvlist_proc_fops, + (void *)RX_ALL); + net->can.pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644, + net->can.proc_dir, + &can_rcvlist_proc_fops, + (void *)RX_FIL); + net->can.pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644, + net->can.proc_dir, + &can_rcvlist_proc_fops, + (void *)RX_INV); + net->can.pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644, + net->can.proc_dir, + &can_rcvlist_eff_proc_fops); + net->can.pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644, + net->can.proc_dir, + &can_rcvlist_sff_proc_fops); } /* * can_remove_proc - remove procfs entries and main CAN proc directory */ -void can_remove_proc(void) +void can_remove_proc(struct net *net) { - if (pde_version) - can_remove_proc_readentry(CAN_PROC_VERSION); + if (net->can.pde_version) + remove_proc_entry(CAN_PROC_VERSION, net->can.proc_dir); - if (pde_stats) - can_remove_proc_readentry(CAN_PROC_STATS); + if (net->can.pde_stats) + remove_proc_entry(CAN_PROC_STATS, net->can.proc_dir); - if (pde_reset_stats) - can_remove_proc_readentry(CAN_PROC_RESET_STATS); + if (net->can.pde_reset_stats) + remove_proc_entry(CAN_PROC_RESET_STATS, net->can.proc_dir); - if (pde_rcvlist_err) - can_remove_proc_readentry(CAN_PROC_RCVLIST_ERR); + if (net->can.pde_rcvlist_err) + remove_proc_entry(CAN_PROC_RCVLIST_ERR, net->can.proc_dir); - if (pde_rcvlist_all) - can_remove_proc_readentry(CAN_PROC_RCVLIST_ALL); + if (net->can.pde_rcvlist_all) + remove_proc_entry(CAN_PROC_RCVLIST_ALL, net->can.proc_dir); - if (pde_rcvlist_fil) - can_remove_proc_readentry(CAN_PROC_RCVLIST_FIL); + if (net->can.pde_rcvlist_fil) + remove_proc_entry(CAN_PROC_RCVLIST_FIL, net->can.proc_dir); - if (pde_rcvlist_inv) - can_remove_proc_readentry(CAN_PROC_RCVLIST_INV); + if (net->can.pde_rcvlist_inv) + remove_proc_entry(CAN_PROC_RCVLIST_INV, net->can.proc_dir); - if (pde_rcvlist_eff) - can_remove_proc_readentry(CAN_PROC_RCVLIST_EFF); + if (net->can.pde_rcvlist_eff) + remove_proc_entry(CAN_PROC_RCVLIST_EFF, net->can.proc_dir); - if (pde_rcvlist_sff) - can_remove_proc_readentry(CAN_PROC_RCVLIST_SFF); + if (net->can.pde_rcvlist_sff) + remove_proc_entry(CAN_PROC_RCVLIST_SFF, net->can.proc_dir); - if (can_dir) - remove_proc_entry("can", init_net.proc_net); + if (net->can.proc_dir) + remove_proc_entry("can", net->proc_net); } diff --git a/net/can/raw.c b/net/can/raw.c index 6dc546a06673..864c80dbdb72 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -181,20 +181,21 @@ static void raw_rcv(struct sk_buff *oskb, void *data) kfree_skb(skb); } -static int raw_enable_filters(struct net_device *dev, struct sock *sk, - struct can_filter *filter, int count) +static int raw_enable_filters(struct net *net, struct net_device *dev, + struct sock *sk, struct can_filter *filter, + int count) { int err = 0; int i; for (i = 0; i < count; i++) { - err = can_rx_register(dev, filter[i].can_id, + err = can_rx_register(net, dev, filter[i].can_id, filter[i].can_mask, raw_rcv, sk, "raw", sk); if (err) { /* clean up successfully registered filters */ while (--i >= 0) - can_rx_unregister(dev, filter[i].can_id, + can_rx_unregister(net, dev, filter[i].can_id, filter[i].can_mask, raw_rcv, sk); break; @@ -204,57 +205,62 @@ static int raw_enable_filters(struct net_device *dev, struct sock *sk, return err; } -static int raw_enable_errfilter(struct net_device *dev, struct sock *sk, - can_err_mask_t err_mask) +static int raw_enable_errfilter(struct net *net, struct net_device *dev, + struct sock *sk, can_err_mask_t err_mask) { int err = 0; if (err_mask) - err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG, + err = can_rx_register(net, dev, 0, err_mask | CAN_ERR_FLAG, raw_rcv, sk, "raw", sk); return err; } -static void raw_disable_filters(struct net_device *dev, struct sock *sk, - struct can_filter *filter, int count) +static void raw_disable_filters(struct net *net, struct net_device *dev, + struct sock *sk, struct can_filter *filter, + int count) { int i; for (i = 0; i < count; i++) - can_rx_unregister(dev, filter[i].can_id, filter[i].can_mask, - raw_rcv, sk); + can_rx_unregister(net, dev, filter[i].can_id, + filter[i].can_mask, raw_rcv, sk); } -static inline void raw_disable_errfilter(struct net_device *dev, +static inline void raw_disable_errfilter(struct net *net, + struct net_device *dev, struct sock *sk, can_err_mask_t err_mask) { if (err_mask) - can_rx_unregister(dev, 0, err_mask | CAN_ERR_FLAG, + can_rx_unregister(net, dev, 0, err_mask | CAN_ERR_FLAG, raw_rcv, sk); } -static inline void raw_disable_allfilters(struct net_device *dev, +static inline void raw_disable_allfilters(struct net *net, + struct net_device *dev, struct sock *sk) { struct raw_sock *ro = raw_sk(sk); - raw_disable_filters(dev, sk, ro->filter, ro->count); - raw_disable_errfilter(dev, sk, ro->err_mask); + raw_disable_filters(net, dev, sk, ro->filter, ro->count); + raw_disable_errfilter(net, dev, sk, ro->err_mask); } -static int raw_enable_allfilters(struct net_device *dev, struct sock *sk) +static int raw_enable_allfilters(struct net *net, struct net_device *dev, + struct sock *sk) { struct raw_sock *ro = raw_sk(sk); int err; - err = raw_enable_filters(dev, sk, ro->filter, ro->count); + err = raw_enable_filters(net, dev, sk, ro->filter, ro->count); if (!err) { - err = raw_enable_errfilter(dev, sk, ro->err_mask); + err = raw_enable_errfilter(net, dev, sk, ro->err_mask); if (err) - raw_disable_filters(dev, sk, ro->filter, ro->count); + raw_disable_filters(net, dev, sk, ro->filter, + ro->count); } return err; @@ -267,7 +273,7 @@ static int raw_notifier(struct notifier_block *nb, struct raw_sock *ro = container_of(nb, struct raw_sock, notifier); struct sock *sk = &ro->sk; - if (!net_eq(dev_net(dev), &init_net)) + if (!net_eq(dev_net(dev), sock_net(sk))) return NOTIFY_DONE; if (dev->type != ARPHRD_CAN) @@ -282,7 +288,7 @@ static int raw_notifier(struct notifier_block *nb, lock_sock(sk); /* remove current filters & unregister */ if (ro->bound) - raw_disable_allfilters(dev, sk); + raw_disable_allfilters(dev_net(dev), dev, sk); if (ro->count > 1) kfree(ro->filter); @@ -358,13 +364,13 @@ static int raw_release(struct socket *sock) if (ro->ifindex) { struct net_device *dev; - dev = dev_get_by_index(&init_net, ro->ifindex); + dev = dev_get_by_index(sock_net(sk), ro->ifindex); if (dev) { - raw_disable_allfilters(dev, sk); + raw_disable_allfilters(dev_net(dev), dev, sk); dev_put(dev); } } else - raw_disable_allfilters(NULL, sk); + raw_disable_allfilters(sock_net(sk), NULL, sk); } if (ro->count > 1) @@ -404,7 +410,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (addr->can_ifindex) { struct net_device *dev; - dev = dev_get_by_index(&init_net, addr->can_ifindex); + dev = dev_get_by_index(sock_net(sk), addr->can_ifindex); if (!dev) { err = -ENODEV; goto out; @@ -420,13 +426,13 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) ifindex = dev->ifindex; /* filters set by default/setsockopt */ - err = raw_enable_allfilters(dev, sk); + err = raw_enable_allfilters(sock_net(sk), dev, sk); dev_put(dev); } else { ifindex = 0; /* filters set by default/setsockopt */ - err = raw_enable_allfilters(NULL, sk); + err = raw_enable_allfilters(sock_net(sk), NULL, sk); } if (!err) { @@ -435,13 +441,15 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (ro->ifindex) { struct net_device *dev; - dev = dev_get_by_index(&init_net, ro->ifindex); + dev = dev_get_by_index(sock_net(sk), + ro->ifindex); if (dev) { - raw_disable_allfilters(dev, sk); + raw_disable_allfilters(dev_net(dev), + dev, sk); dev_put(dev); } } else - raw_disable_allfilters(NULL, sk); + raw_disable_allfilters(sock_net(sk), NULL, sk); } ro->ifindex = ifindex; ro->bound = 1; @@ -517,15 +525,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, lock_sock(sk); if (ro->bound && ro->ifindex) - dev = dev_get_by_index(&init_net, ro->ifindex); + dev = dev_get_by_index(sock_net(sk), ro->ifindex); if (ro->bound) { /* (try to) register the new filters */ if (count == 1) - err = raw_enable_filters(dev, sk, &sfilter, 1); + err = raw_enable_filters(sock_net(sk), dev, sk, + &sfilter, 1); else - err = raw_enable_filters(dev, sk, filter, - count); + err = raw_enable_filters(sock_net(sk), dev, sk, + filter, count); if (err) { if (count > 1) kfree(filter); @@ -533,7 +542,8 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, } /* remove old filter registrations */ - raw_disable_filters(dev, sk, ro->filter, ro->count); + raw_disable_filters(sock_net(sk), dev, sk, ro->filter, + ro->count); } /* remove old filter space */ @@ -569,18 +579,20 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, lock_sock(sk); if (ro->bound && ro->ifindex) - dev = dev_get_by_index(&init_net, ro->ifindex); + dev = dev_get_by_index(sock_net(sk), ro->ifindex); /* remove current error mask */ if (ro->bound) { /* (try to) register the new err_mask */ - err = raw_enable_errfilter(dev, sk, err_mask); + err = raw_enable_errfilter(sock_net(sk), dev, sk, + err_mask); if (err) goto out_err; /* remove old err_mask registration */ - raw_disable_errfilter(dev, sk, ro->err_mask); + raw_disable_errfilter(sock_net(sk), dev, sk, + ro->err_mask); } /* link new err_mask to the socket */ @@ -741,7 +753,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) return -EINVAL; } - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) return -ENXIO; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 38dcf1eb427d..f76bb3332613 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -7,6 +7,7 @@ #include <linux/kthread.h> #include <linux/net.h> #include <linux/nsproxy.h> +#include <linux/sched/mm.h> #include <linux/slab.h> #include <linux/socket.h> #include <linux/string.h> @@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con) { struct sockaddr_storage *paddr = &con->peer_addr.in_addr; struct socket *sock; + unsigned int noio_flag; int ret; BUG_ON(con->sock); + + /* sock_create_kern() allocates with GFP_KERNEL */ + noio_flag = memalloc_noio_save(); ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); + memalloc_noio_restore(noio_flag); if (ret) return ret; sock->sk->sk_allocation = GFP_NOFS; diff --git a/net/core/datagram.c b/net/core/datagram.c index ea633342ab0d..15ef99469cfe 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -256,8 +256,12 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, } spin_unlock_irqrestore(&queue->lock, cpu_flags); - } while (sk_can_busy_loop(sk) && - sk_busy_loop(sk, flags & MSG_DONTWAIT)); + + if (!sk_can_busy_loop(sk)) + break; + + sk_busy_loop(sk, flags & MSG_DONTWAIT); + } while (!skb_queue_empty(&sk->sk_receive_queue)); error = -EAGAIN; @@ -398,7 +402,7 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len) { int start = skb_headlen(skb); - int i, copy = start - offset; + int i, copy = start - offset, start_off = offset, n; struct sk_buff *frag_iter; trace_skb_copy_datagram_iovec(skb, len); @@ -407,11 +411,12 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, if (copy > 0) { if (copy > len) copy = len; - if (copy_to_iter(skb->data + offset, copy, to) != copy) + n = copy_to_iter(skb->data + offset, copy, to); + offset += n; + if (n != copy) goto short_copy; if ((len -= copy) == 0) return 0; - offset += copy; } /* Copy paged appendix. Hmm... why does this look so complicated? */ @@ -425,13 +430,14 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, if ((copy = end - offset) > 0) { if (copy > len) copy = len; - if (copy_page_to_iter(skb_frag_page(frag), + n = copy_page_to_iter(skb_frag_page(frag), frag->page_offset + offset - - start, copy, to) != copy) + start, copy, to); + offset += n; + if (n != copy) goto short_copy; if (!(len -= copy)) return 0; - offset += copy; } start = end; } @@ -463,6 +469,7 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, */ fault: + iov_iter_revert(to, offset - start_off); return -EFAULT; short_copy: @@ -613,7 +620,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, __wsum *csump) { int start = skb_headlen(skb); - int i, copy = start - offset; + int i, copy = start - offset, start_off = offset; struct sk_buff *frag_iter; int pos = 0; int n; @@ -623,11 +630,11 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, if (copy > len) copy = len; n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to); + offset += n; if (n != copy) goto fault; if ((len -= copy) == 0) return 0; - offset += copy; pos = copy; } @@ -649,12 +656,12 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, offset - start, copy, &csum2, to); kunmap(page); + offset += n; if (n != copy) goto fault; *csump = csum_block_add(*csump, csum2, pos); if (!(len -= copy)) return 0; - offset += copy; pos += copy; } start = end; @@ -687,6 +694,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, return 0; fault: + iov_iter_revert(to, offset - start_off); return -EFAULT; } @@ -771,6 +779,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, } return 0; csum_error: + iov_iter_revert(&msg->msg_iter, chunk); return -EINVAL; fault: return -EFAULT; diff --git a/net/core/dev.c b/net/core/dev.c index 7869ae3837ca..5d33e2baab2b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5060,27 +5060,28 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) do_softirq(); } -bool sk_busy_loop(struct sock *sk, int nonblock) +void napi_busy_loop(unsigned int napi_id, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg) { - unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; + unsigned long start_time = loop_end ? busy_loop_current_time() : 0; int (*napi_poll)(struct napi_struct *napi, int budget); void *have_poll_lock = NULL; struct napi_struct *napi; - int rc; restart: - rc = false; napi_poll = NULL; rcu_read_lock(); - napi = napi_by_id(sk->sk_napi_id); + napi = napi_by_id(napi_id); if (!napi) goto out; preempt_disable(); for (;;) { - rc = 0; + int work = 0; + local_bh_disable(); if (!napi_poll) { unsigned long val = READ_ONCE(napi->state); @@ -5098,16 +5099,15 @@ restart: have_poll_lock = netpoll_poll_lock(napi); napi_poll = napi->poll; } - rc = napi_poll(napi, BUSY_POLL_BUDGET); - trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); + work = napi_poll(napi, BUSY_POLL_BUDGET); + trace_napi_poll(napi, work, BUSY_POLL_BUDGET); count: - if (rc > 0) - __NET_ADD_STATS(sock_net(sk), - LINUX_MIB_BUSYPOLLRXPACKETS, rc); + if (work > 0) + __NET_ADD_STATS(dev_net(napi->dev), + LINUX_MIB_BUSYPOLLRXPACKETS, work); local_bh_enable(); - if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) || - busy_loop_timeout(end_time)) + if (!loop_end || loop_end(loop_end_arg, start_time)) break; if (unlikely(need_resched())) { @@ -5116,9 +5116,8 @@ count: preempt_enable(); rcu_read_unlock(); cond_resched(); - rc = !skb_queue_empty(&sk->sk_receive_queue); - if (rc || busy_loop_timeout(end_time)) - return rc; + if (loop_end(loop_end_arg, start_time)) + return; goto restart; } cpu_relax(); @@ -5126,12 +5125,10 @@ count: if (napi_poll) busy_poll_stop(napi, have_poll_lock); preempt_enable(); - rc = !skb_queue_empty(&sk->sk_receive_queue); out: rcu_read_unlock(); - return rc; } -EXPORT_SYMBOL(sk_busy_loop); +EXPORT_SYMBOL(napi_busy_loop); #endif /* CONFIG_NET_RX_BUSY_POLL */ @@ -5143,10 +5140,10 @@ static void napi_hash_add(struct napi_struct *napi) spin_lock(&napi_hash_lock); - /* 0..NR_CPUS+1 range is reserved for sender_cpu use */ + /* 0..NR_CPUS range is reserved for sender_cpu use */ do { - if (unlikely(++napi_gen_id < NR_CPUS + 1)) - napi_gen_id = NR_CPUS + 1; + if (unlikely(++napi_gen_id < MIN_NAPI_ID)) + napi_gen_id = MIN_NAPI_ID; } while (napi_by_id(napi_gen_id)); napi->napi_id = napi_gen_id; @@ -6757,7 +6754,6 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags) return err; } -EXPORT_SYMBOL(dev_change_xdp_fd); /** * dev_new_index - allocate an ifindex diff --git a/net/core/devlink.c b/net/core/devlink.c index e9c1e6acfb6d..0afac5800b57 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1493,8 +1493,686 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, if (err) return err; } + return 0; +} + +int devlink_dpipe_match_put(struct sk_buff *skb, + struct devlink_dpipe_match *match) +{ + struct devlink_dpipe_header *header = match->header; + struct devlink_dpipe_field *field = &header->fields[match->field_id]; + struct nlattr *match_attr; + + match_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_MATCH); + if (!match_attr) + return -EMSGSIZE; + + if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) || + nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global)) + goto nla_put_failure; + + nla_nest_end(skb, match_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, match_attr); + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_match_put); + +static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table, + struct sk_buff *skb) +{ + struct nlattr *matches_attr; + + matches_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_MATCHES); + if (!matches_attr) + return -EMSGSIZE; + + if (table->table_ops->matches_dump(table->priv, skb)) + goto nla_put_failure; + + nla_nest_end(skb, matches_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, matches_attr); + return -EMSGSIZE; +} + +int devlink_dpipe_action_put(struct sk_buff *skb, + struct devlink_dpipe_action *action) +{ + struct devlink_dpipe_header *header = action->header; + struct devlink_dpipe_field *field = &header->fields[action->field_id]; + struct nlattr *action_attr; + + action_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ACTION); + if (!action_attr) + return -EMSGSIZE; + + if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) || + nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global)) + goto nla_put_failure; + + nla_nest_end(skb, action_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, action_attr); + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_action_put); + +static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table, + struct sk_buff *skb) +{ + struct nlattr *actions_attr; + + actions_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_ACTIONS); + if (!actions_attr) + return -EMSGSIZE; + + if (table->table_ops->actions_dump(table->priv, skb)) + goto nla_put_failure; + + nla_nest_end(skb, actions_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, actions_attr); + return -EMSGSIZE; +} + +static int devlink_dpipe_table_put(struct sk_buff *skb, + struct devlink_dpipe_table *table) +{ + struct nlattr *table_attr; + + table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE); + if (!table_attr) + return -EMSGSIZE; + + if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) || + nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table->size, + DEVLINK_ATTR_PAD)) + goto nla_put_failure; + if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, + table->counters_enabled)) + goto nla_put_failure; + + if (devlink_dpipe_matches_put(table, skb)) + goto nla_put_failure; + + if (devlink_dpipe_actions_put(table, skb)) + goto nla_put_failure; + + nla_nest_end(skb, table_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, table_attr); + return -EMSGSIZE; +} + +static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb, + struct genl_info *info) +{ + int err; + + if (*pskb) { + err = genlmsg_reply(*pskb, info); + if (err) + return err; + } + *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!*pskb) + return -ENOMEM; + return 0; +} + +static int devlink_dpipe_tables_fill(struct genl_info *info, + enum devlink_command cmd, int flags, + struct list_head *dpipe_tables, + const char *table_name) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_dpipe_table *table; + struct nlattr *tables_attr; + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + bool incomplete; + void *hdr; + int i; + int err; + + table = list_first_entry(dpipe_tables, + struct devlink_dpipe_table, list); +start_again: + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + return err; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(skb, devlink)) + goto nla_put_failure; + tables_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLES); + if (!tables_attr) + goto nla_put_failure; + + i = 0; + incomplete = false; + list_for_each_entry_from(table, dpipe_tables, list) { + if (!table_name) { + err = devlink_dpipe_table_put(skb, table); + if (err) { + if (!i) + goto err_table_put; + incomplete = true; + break; + } + } else { + if (!strcmp(table->name, table_name)) { + err = devlink_dpipe_table_put(skb, table); + if (err) + break; + } + } + i++; + } + + nla_nest_end(skb, tables_attr); + genlmsg_end(skb, hdr); + if (incomplete) + goto start_again; + +send_done: + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + goto err_skb_send_alloc; + goto send_done; + } + + return genlmsg_reply(skb, info); + +nla_put_failure: + err = -EMSGSIZE; +err_table_put: +err_skb_send_alloc: + genlmsg_cancel(skb, hdr); + nlmsg_free(skb); + return err; +} + +static int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const char *table_name = NULL; + + if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]) + table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); + + return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0, + &devlink->dpipe_table_list, + table_name); +} + +static int devlink_dpipe_value_put(struct sk_buff *skb, + struct devlink_dpipe_value *value) +{ + if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE, + value->value_size, value->value)) + return -EMSGSIZE; + if (value->mask) + if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK, + value->value_size, value->mask)) + return -EMSGSIZE; + if (value->mapping_valid) + if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING, + value->mapping_value)) + return -EMSGSIZE; + return 0; +} + +static int devlink_dpipe_action_value_put(struct sk_buff *skb, + struct devlink_dpipe_value *value) +{ + if (!value->action) + return -EINVAL; + if (devlink_dpipe_action_put(skb, value->action)) + return -EMSGSIZE; + if (devlink_dpipe_value_put(skb, value)) + return -EMSGSIZE; + return 0; +} + +static int devlink_dpipe_action_values_put(struct sk_buff *skb, + struct devlink_dpipe_value *values, + unsigned int values_count) +{ + struct nlattr *action_attr; + int i; + int err; + + for (i = 0; i < values_count; i++) { + action_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_ACTION_VALUE); + if (!action_attr) + return -EMSGSIZE; + err = devlink_dpipe_action_value_put(skb, &values[i]); + if (err) + goto err_action_value_put; + nla_nest_end(skb, action_attr); + } + return 0; + +err_action_value_put: + nla_nest_cancel(skb, action_attr); + return err; +} + +static int devlink_dpipe_match_value_put(struct sk_buff *skb, + struct devlink_dpipe_value *value) +{ + if (!value->match) + return -EINVAL; + if (devlink_dpipe_match_put(skb, value->match)) + return -EMSGSIZE; + if (devlink_dpipe_value_put(skb, value)) + return -EMSGSIZE; + return 0; +} + +static int devlink_dpipe_match_values_put(struct sk_buff *skb, + struct devlink_dpipe_value *values, + unsigned int values_count) +{ + struct nlattr *match_attr; + int i; + int err; + + for (i = 0; i < values_count; i++) { + match_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_MATCH_VALUE); + if (!match_attr) + return -EMSGSIZE; + err = devlink_dpipe_match_value_put(skb, &values[i]); + if (err) + goto err_match_value_put; + nla_nest_end(skb, match_attr); + } + return 0; + +err_match_value_put: + nla_nest_cancel(skb, match_attr); + return err; +} + +static int devlink_dpipe_entry_put(struct sk_buff *skb, + struct devlink_dpipe_entry *entry) +{ + struct nlattr *entry_attr, *matches_attr, *actions_attr; + int err; + + entry_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ENTRY); + if (!entry_attr) + return -EMSGSIZE; + + if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index, + DEVLINK_ATTR_PAD)) + goto nla_put_failure; + if (entry->counter_valid) + if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER, + entry->counter, DEVLINK_ATTR_PAD)) + goto nla_put_failure; + + matches_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES); + if (!matches_attr) + goto nla_put_failure; + + err = devlink_dpipe_match_values_put(skb, entry->match_values, + entry->match_values_count); + if (err) { + nla_nest_cancel(skb, matches_attr); + goto err_match_values_put; + } + nla_nest_end(skb, matches_attr); + + actions_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES); + if (!actions_attr) + goto nla_put_failure; + + err = devlink_dpipe_action_values_put(skb, entry->action_values, + entry->action_values_count); + if (err) { + nla_nest_cancel(skb, actions_attr); + goto err_action_values_put; + } + nla_nest_end(skb, actions_attr); + nla_nest_end(skb, entry_attr); return 0; + +nla_put_failure: + err = -EMSGSIZE; +err_match_values_put: +err_action_values_put: + nla_nest_cancel(skb, entry_attr); + return err; +} + +static struct devlink_dpipe_table * +devlink_dpipe_table_find(struct list_head *dpipe_tables, + const char *table_name) +{ + struct devlink_dpipe_table *table; + + list_for_each_entry_rcu(table, dpipe_tables, list) { + if (!strcmp(table->name, table_name)) + return table; + } + return NULL; +} + +int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct devlink *devlink; + int err; + + err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb, + dump_ctx->info); + if (err) + return err; + + dump_ctx->hdr = genlmsg_put(dump_ctx->skb, + dump_ctx->info->snd_portid, + dump_ctx->info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, + dump_ctx->cmd); + if (!dump_ctx->hdr) + goto nla_put_failure; + + devlink = dump_ctx->info->user_ptr[0]; + if (devlink_nl_put_handle(dump_ctx->skb, devlink)) + goto nla_put_failure; + dump_ctx->nest = nla_nest_start(dump_ctx->skb, + DEVLINK_ATTR_DPIPE_ENTRIES); + if (!dump_ctx->nest) + goto nla_put_failure; + return 0; + +nla_put_failure: + genlmsg_cancel(dump_ctx->skb, dump_ctx->hdr); + nlmsg_free(dump_ctx->skb); + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare); + +int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, + struct devlink_dpipe_entry *entry) +{ + return devlink_dpipe_entry_put(dump_ctx->skb, entry); +} +EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append); + +int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) +{ + nla_nest_end(dump_ctx->skb, dump_ctx->nest); + genlmsg_end(dump_ctx->skb, dump_ctx->hdr); + return 0; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close); + +static int devlink_dpipe_entries_fill(struct genl_info *info, + enum devlink_command cmd, int flags, + struct devlink_dpipe_table *table) +{ + struct devlink_dpipe_dump_ctx dump_ctx; + struct nlmsghdr *nlh; + int err; + + dump_ctx.skb = NULL; + dump_ctx.cmd = cmd; + dump_ctx.info = info; + + err = table->table_ops->entries_dump(table->priv, + table->counters_enabled, + &dump_ctx); + if (err) + goto err_entries_dump; + +send_done: + nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info); + if (err) + goto err_skb_send_alloc; + goto send_done; + } + return genlmsg_reply(dump_ctx.skb, info); + +err_entries_dump: +err_skb_send_alloc: + genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr); + nlmsg_free(dump_ctx.skb); + return err; +} + +static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_dpipe_table *table; + const char *table_name; + + if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]) + return -EINVAL; + + table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + if (!table) + return -EINVAL; + + if (!table->table_ops->entries_dump) + return -EINVAL; + + return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET, + 0, table); +} + +static int devlink_dpipe_fields_put(struct sk_buff *skb, + const struct devlink_dpipe_header *header) +{ + struct devlink_dpipe_field *field; + struct nlattr *field_attr; + int i; + + for (i = 0; i < header->fields_count; i++) { + field = &header->fields[i]; + field_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_FIELD); + if (!field_attr) + return -EMSGSIZE; + if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type)) + goto nla_put_failure; + nla_nest_end(skb, field_attr); + } + return 0; + +nla_put_failure: + nla_nest_cancel(skb, field_attr); + return -EMSGSIZE; +} + +static int devlink_dpipe_header_put(struct sk_buff *skb, + struct devlink_dpipe_header *header) +{ + struct nlattr *fields_attr, *header_attr; + int err; + + header_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER); + if (!header_attr) + return -EMSGSIZE; + + if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) || + nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global)) + goto nla_put_failure; + + fields_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER_FIELDS); + if (!fields_attr) + goto nla_put_failure; + + err = devlink_dpipe_fields_put(skb, header); + if (err) { + nla_nest_cancel(skb, fields_attr); + goto nla_put_failure; + } + nla_nest_end(skb, fields_attr); + nla_nest_end(skb, header_attr); + return 0; + +nla_put_failure: + err = -EMSGSIZE; + nla_nest_cancel(skb, header_attr); + return err; +} + +static int devlink_dpipe_headers_fill(struct genl_info *info, + enum devlink_command cmd, int flags, + struct devlink_dpipe_headers * + dpipe_headers) +{ + struct devlink *devlink = info->user_ptr[0]; + struct nlattr *headers_attr; + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + void *hdr; + int i, j; + int err; + + i = 0; +start_again: + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + return err; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(skb, devlink)) + goto nla_put_failure; + headers_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADERS); + if (!headers_attr) + goto nla_put_failure; + + j = 0; + for (; i < dpipe_headers->headers_count; i++) { + err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]); + if (err) { + if (!j) + goto err_table_put; + break; + } + j++; + } + nla_nest_end(skb, headers_attr); + genlmsg_end(skb, hdr); + if (i != dpipe_headers->headers_count) + goto start_again; + +send_done: + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + goto err_skb_send_alloc; + goto send_done; + } + return genlmsg_reply(skb, info); + +nla_put_failure: + err = -EMSGSIZE; +err_table_put: +err_skb_send_alloc: + genlmsg_cancel(skb, hdr); + nlmsg_free(skb); + return err; +} + +static int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + + if (!devlink->dpipe_headers) + return -EOPNOTSUPP; + return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET, + 0, devlink->dpipe_headers); +} + +static int devlink_dpipe_table_counters_set(struct devlink *devlink, + const char *table_name, + bool enable) +{ + struct devlink_dpipe_table *table; + + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + if (!table) + return -EINVAL; + + if (table->counter_control_extern) + return -EOPNOTSUPP; + + if (!(table->counters_enabled ^ enable)) + return 0; + + table->counters_enabled = enable; + if (table->table_ops->counters_set_update) + table->table_ops->counters_set_update(table->priv, enable); + return 0; +} + +static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const char *table_name; + bool counters_enable; + + if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME] || + !info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]) + return -EINVAL; + + table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); + counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]); + + return devlink_dpipe_table_counters_set(devlink, table_name, + counters_enable); } static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { @@ -1512,6 +2190,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 }, [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -1644,6 +2324,34 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, + .doit = devlink_nl_cmd_dpipe_table_get, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, + .doit = devlink_nl_cmd_dpipe_entries_get, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, + .doit = devlink_nl_cmd_dpipe_headers_get, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, + .doit = devlink_nl_cmd_dpipe_table_counters_set, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { @@ -1680,6 +2388,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) devlink_net_set(devlink, &init_net); INIT_LIST_HEAD(&devlink->port_list); INIT_LIST_HEAD(&devlink->sb_list); + INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc); @@ -1880,6 +2589,133 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) } EXPORT_SYMBOL_GPL(devlink_sb_unregister); +/** + * devlink_dpipe_headers_register - register dpipe headers + * + * @devlink: devlink + * @dpipe_headers: dpipe header array + * + * Register the headers supported by hardware. + */ +int devlink_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers *dpipe_headers) +{ + mutex_lock(&devlink_mutex); + devlink->dpipe_headers = dpipe_headers; + mutex_unlock(&devlink_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register); + +/** + * devlink_dpipe_headers_unregister - unregister dpipe headers + * + * @devlink: devlink + * + * Unregister the headers supported by hardware. + */ +void devlink_dpipe_headers_unregister(struct devlink *devlink) +{ + mutex_lock(&devlink_mutex); + devlink->dpipe_headers = NULL; + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister); + +/** + * devlink_dpipe_table_counter_enabled - check if counter allocation + * required + * @devlink: devlink + * @table_name: tables name + * + * Used by driver to check if counter allocation is required. + * After counter allocation is turned on the table entries + * are updated to include counter statistics. + * + * After that point on the driver must respect the counter + * state so that each entry added to the table is added + * with a counter. + */ +bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, + const char *table_name) +{ + struct devlink_dpipe_table *table; + bool enabled; + + rcu_read_lock(); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + enabled = false; + if (table) + enabled = table->counters_enabled; + rcu_read_unlock(); + return enabled; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled); + +/** + * devlink_dpipe_table_register - register dpipe table + * + * @devlink: devlink + * @table_name: table name + * @table_ops: table ops + * @priv: priv + * @size: size + * @counter_control_extern: external control for counters + */ +int devlink_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, u64 size, + bool counter_control_extern) +{ + struct devlink_dpipe_table *table; + + if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name)) + return -EEXIST; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + table->name = table_name; + table->table_ops = table_ops; + table->priv = priv; + table->size = size; + table->counter_control_extern = counter_control_extern; + + mutex_lock(&devlink_mutex); + list_add_tail_rcu(&table->list, &devlink->dpipe_table_list); + mutex_unlock(&devlink_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_table_register); + +/** + * devlink_dpipe_table_unregister - unregister dpipe table + * + * @devlink: devlink + * @table_name: table name + */ +void devlink_dpipe_table_unregister(struct devlink *devlink, + const char *table_name) +{ + struct devlink_dpipe_table *table; + + mutex_lock(&devlink_mutex); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + if (!table) + goto unlock; + list_del_rcu(&table->list); + mutex_unlock(&devlink_mutex); + kfree_rcu(table, rcu); + return; +unlock: + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); + static int __init devlink_module_init(void) { return genl_register_family(&devlink_nl_family); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index fb55327dcfea..70ccda233bd1 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -412,9 +412,8 @@ static int __init init_net_drop_monitor(void) for_each_possible_cpu(cpu) { data = &per_cpu(dm_cpu_data, cpu); INIT_WORK(&data->dm_alert_work, send_dm_alert); - init_timer(&data->send_timer); - data->send_timer.data = (unsigned long)data; - data->send_timer.function = sched_send_work; + setup_timer(&data->send_timer, sched_send_work, + (unsigned long)data); spin_lock_init(&data->lock); reset_per_cpu_data(data); } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index aecb2c7241b6..905a88ad28e0 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -109,6 +109,7 @@ static const char rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = { [ETH_RSS_HASH_TOP_BIT] = "toeplitz", [ETH_RSS_HASH_XOR_BIT] = "xor", + [ETH_RSS_HASH_CRC32_BIT] = "crc32", }; static const char diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index b6791d94841d..df03110ca3c8 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -23,6 +23,20 @@ static const struct fib_kuid_range fib_kuid_range_unset = { KUIDT_INIT(~0), }; +bool fib_rule_matchall(const struct fib_rule *rule) +{ + if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id || + rule->flags) + return false; + if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1) + return false; + if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) || + !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end)) + return false; + return true; +} +EXPORT_SYMBOL_GPL(fib_rule_matchall); + int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { @@ -372,7 +386,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; } - err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, NULL); if (err < 0) goto errout; @@ -566,7 +580,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; } - err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, NULL); if (err < 0) goto errout; diff --git a/net/core/filter.c b/net/core/filter.c index ebaeaf2e46e8..ce2a19da8aa4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -26,6 +26,7 @@ #include <linux/mm.h> #include <linux/fcntl.h> #include <linux/socket.h> +#include <linux/sock_diag.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> @@ -91,8 +92,13 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); if (filter) { - unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb); + struct sock *save_sk = skb->sk; + unsigned int pkt_len; + + skb->sk = sk; + pkt_len = bpf_prog_run_save_cb(filter->prog, skb); err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; + skb->sk = save_sk; } rcu_read_unlock(); @@ -928,7 +934,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu) */ static void sk_filter_release(struct sk_filter *fp) { - if (atomic_dec_and_test(&fp->refcnt)) + if (refcount_dec_and_test(&fp->refcnt)) call_rcu(&fp->rcu, sk_filter_release_rcu); } @@ -943,20 +949,27 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) /* try to charge the socket memory if there is space available * return true on success */ -bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) +static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp) { u32 filter_size = bpf_prog_size(fp->prog->len); /* same check as in sock_kmalloc() */ if (filter_size <= sysctl_optmem_max && atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) { - atomic_inc(&fp->refcnt); atomic_add(filter_size, &sk->sk_omem_alloc); return true; } return false; } +bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) +{ + bool ret = __sk_filter_charge(sk, fp); + if (ret) + refcount_inc(&fp->refcnt); + return ret; +} + static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) { struct sock_filter *old_prog; @@ -1179,12 +1192,12 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) return -ENOMEM; fp->prog = prog; - atomic_set(&fp->refcnt, 0); - if (!sk_filter_charge(sk, fp)) { + if (!__sk_filter_charge(sk, fp)) { kfree(fp); return -ENOMEM; } + refcount_set(&fp->refcnt, 1); old_fp = rcu_dereference_protected(sk->sk_filter, lockdep_sock_is_held(sk)); @@ -2599,6 +2612,36 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) +{ + return skb->sk ? sock_gen_cookie(skb->sk) : 0; +} + +static const struct bpf_func_proto bpf_get_socket_cookie_proto = { + .func = bpf_get_socket_cookie, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + +BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb) +{ + struct sock *sk = sk_to_full_sk(skb->sk); + kuid_t kuid; + + if (!sk || !sk_fullsock(sk)) + return overflowuid; + kuid = sock_net_uid(sock_net(sk), sk); + return from_kuid_munged(sock_net(sk)->user_ns, kuid); +} + +static const struct bpf_func_proto bpf_get_socket_uid_proto = { + .func = bpf_get_socket_uid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -2633,6 +2676,10 @@ sk_filter_func_proto(enum bpf_func_id func_id) switch (func_id) { case BPF_FUNC_skb_load_bytes: return &bpf_skb_load_bytes_proto; + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_cookie_proto; + case BPF_FUNC_get_socket_uid: + return &bpf_get_socket_uid_proto; default: return bpf_base_func_proto(func_id); } @@ -2692,6 +2739,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_skb_under_cgroup: return &bpf_skb_under_cgroup_proto; + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_cookie_proto; + case BPF_FUNC_get_socket_uid: + return &bpf_get_socket_uid_proto; default: return bpf_base_func_proto(func_id); } @@ -3252,111 +3303,55 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } -static const struct bpf_verifier_ops sk_filter_ops = { +const struct bpf_verifier_ops sk_filter_prog_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, }; -static const struct bpf_verifier_ops tc_cls_act_ops = { +const struct bpf_verifier_ops tc_cls_act_prog_ops = { .get_func_proto = tc_cls_act_func_proto, .is_valid_access = tc_cls_act_is_valid_access, .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, + .test_run = bpf_prog_test_run_skb, }; -static const struct bpf_verifier_ops xdp_ops = { +const struct bpf_verifier_ops xdp_prog_ops = { .get_func_proto = xdp_func_proto, .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, + .test_run = bpf_prog_test_run_xdp, }; -static const struct bpf_verifier_ops cg_skb_ops = { +const struct bpf_verifier_ops cg_skb_prog_ops = { .get_func_proto = cg_skb_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, + .test_run = bpf_prog_test_run_skb, }; -static const struct bpf_verifier_ops lwt_inout_ops = { +const struct bpf_verifier_ops lwt_inout_prog_ops = { .get_func_proto = lwt_inout_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, + .test_run = bpf_prog_test_run_skb, }; -static const struct bpf_verifier_ops lwt_xmit_ops = { +const struct bpf_verifier_ops lwt_xmit_prog_ops = { .get_func_proto = lwt_xmit_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, + .test_run = bpf_prog_test_run_skb, }; -static const struct bpf_verifier_ops cg_sock_ops = { +const struct bpf_verifier_ops cg_sock_prog_ops = { .get_func_proto = bpf_base_func_proto, .is_valid_access = sock_filter_is_valid_access, .convert_ctx_access = sock_filter_convert_ctx_access, }; -static struct bpf_prog_type_list sk_filter_type __ro_after_init = { - .ops = &sk_filter_ops, - .type = BPF_PROG_TYPE_SOCKET_FILTER, -}; - -static struct bpf_prog_type_list sched_cls_type __ro_after_init = { - .ops = &tc_cls_act_ops, - .type = BPF_PROG_TYPE_SCHED_CLS, -}; - -static struct bpf_prog_type_list sched_act_type __ro_after_init = { - .ops = &tc_cls_act_ops, - .type = BPF_PROG_TYPE_SCHED_ACT, -}; - -static struct bpf_prog_type_list xdp_type __ro_after_init = { - .ops = &xdp_ops, - .type = BPF_PROG_TYPE_XDP, -}; - -static struct bpf_prog_type_list cg_skb_type __ro_after_init = { - .ops = &cg_skb_ops, - .type = BPF_PROG_TYPE_CGROUP_SKB, -}; - -static struct bpf_prog_type_list lwt_in_type __ro_after_init = { - .ops = &lwt_inout_ops, - .type = BPF_PROG_TYPE_LWT_IN, -}; - -static struct bpf_prog_type_list lwt_out_type __ro_after_init = { - .ops = &lwt_inout_ops, - .type = BPF_PROG_TYPE_LWT_OUT, -}; - -static struct bpf_prog_type_list lwt_xmit_type __ro_after_init = { - .ops = &lwt_xmit_ops, - .type = BPF_PROG_TYPE_LWT_XMIT, -}; - -static struct bpf_prog_type_list cg_sock_type __ro_after_init = { - .ops = &cg_sock_ops, - .type = BPF_PROG_TYPE_CGROUP_SOCK -}; - -static int __init register_sk_filter_ops(void) -{ - bpf_register_prog_type(&sk_filter_type); - bpf_register_prog_type(&sched_cls_type); - bpf_register_prog_type(&sched_act_type); - bpf_register_prog_type(&xdp_type); - bpf_register_prog_type(&cg_skb_type); - bpf_register_prog_type(&cg_sock_type); - bpf_register_prog_type(&lwt_in_type); - bpf_register_prog_type(&lwt_out_type); - bpf_register_prog_type(&lwt_xmit_type); - - return 0; -} -late_initcall(register_sk_filter_ops); - int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/flow.c b/net/core/flow.c index f765c11d8df5..f7f5d1932a27 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -47,7 +47,7 @@ struct flow_flush_info { static struct kmem_cache *flow_cachep __read_mostly; -#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift) +#define flow_cache_hash_size(cache) (1U << (cache)->hash_shift) #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) static void flow_cache_new_hashrnd(unsigned long arg) @@ -99,7 +99,8 @@ static void flow_cache_gc_task(struct work_struct *work) } static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, - int deleted, struct list_head *gc_list, + unsigned int deleted, + struct list_head *gc_list, struct netns_xfrm *xfrm) { if (deleted) { @@ -114,17 +115,18 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, static void __flow_cache_shrink(struct flow_cache *fc, struct flow_cache_percpu *fcp, - int shrink_to) + unsigned int shrink_to) { struct flow_cache_entry *fle; struct hlist_node *tmp; LIST_HEAD(gc_list); - int i, deleted = 0; + unsigned int deleted = 0; struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, flow_cache_global); + unsigned int i; for (i = 0; i < flow_cache_hash_size(fc); i++) { - int saved = 0; + unsigned int saved = 0; hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { @@ -145,7 +147,7 @@ static void __flow_cache_shrink(struct flow_cache *fc, static void flow_cache_shrink(struct flow_cache *fc, struct flow_cache_percpu *fcp) { - int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); + unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); __flow_cache_shrink(fc, fcp, shrink_to); } @@ -161,7 +163,7 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, const struct flowi *key, - size_t keysize) + unsigned int keysize) { const u32 *k = (const u32 *) key; const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); @@ -174,7 +176,7 @@ static u32 flow_hash_code(struct flow_cache *fc, * important assumptions that we can here, such as alignment. */ static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, - size_t keysize) + unsigned int keysize) { const flow_compare_t *k1, *k1_lim, *k2; @@ -199,7 +201,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache_percpu *fcp; struct flow_cache_entry *fle, *tfle; struct flow_cache_object *flo; - size_t keysize; + unsigned int keysize; unsigned int hash; local_bh_disable(); @@ -295,9 +297,10 @@ static void flow_cache_flush_tasklet(unsigned long data) struct flow_cache_entry *fle; struct hlist_node *tmp; LIST_HEAD(gc_list); - int i, deleted = 0; + unsigned int deleted = 0; struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, flow_cache_global); + unsigned int i; fcp = this_cpu_ptr(fc->percpu); for (i = 0; i < flow_cache_hash_size(fc); i++) { @@ -327,7 +330,7 @@ static void flow_cache_flush_tasklet(unsigned long data) static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu) { struct flow_cache_percpu *fcp; - int i; + unsigned int i; fcp = per_cpu_ptr(fc->percpu, cpu); for (i = 0; i < flow_cache_hash_size(fc); i++) @@ -402,12 +405,12 @@ void flow_cache_flush_deferred(struct net *net) static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) { struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); + unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); if (!fcp->hash_table) { fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); if (!fcp->hash_table) { - pr_err("NET: failed to allocate flow cache sz %zu\n", sz); + pr_err("NET: failed to allocate flow cache sz %u\n", sz); return -ENOMEM; } fcp->hash_rnd_recalc = 1; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index c35aae13c8d2..c9cf425303f8 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -113,6 +113,216 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, } EXPORT_SYMBOL(__skb_flow_get_ports); +enum flow_dissect_ret { + FLOW_DISSECT_RET_OUT_GOOD, + FLOW_DISSECT_RET_OUT_BAD, + FLOW_DISSECT_RET_OUT_PROTO_AGAIN, +}; + +static enum flow_dissect_ret +__skb_flow_dissect_mpls(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, int nhoff, int hlen) +{ + struct flow_dissector_key_keyid *key_keyid; + struct mpls_label *hdr, _hdr[2]; + + if (!dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) + return FLOW_DISSECT_RET_OUT_GOOD; + + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, + hlen, &_hdr); + if (!hdr) + return FLOW_DISSECT_RET_OUT_BAD; + + if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >> + MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) { + key_keyid = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_MPLS_ENTROPY, + target_container); + key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK); + } + return FLOW_DISSECT_RET_OUT_GOOD; +} + +static enum flow_dissect_ret +__skb_flow_dissect_arp(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, int nhoff, int hlen) +{ + struct flow_dissector_key_arp *key_arp; + struct { + unsigned char ar_sha[ETH_ALEN]; + unsigned char ar_sip[4]; + unsigned char ar_tha[ETH_ALEN]; + unsigned char ar_tip[4]; + } *arp_eth, _arp_eth; + const struct arphdr *arp; + struct arphdr _arp; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP)) + return FLOW_DISSECT_RET_OUT_GOOD; + + arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data, + hlen, &_arp); + if (!arp) + return FLOW_DISSECT_RET_OUT_BAD; + + if (arp->ar_hrd != htons(ARPHRD_ETHER) || + arp->ar_pro != htons(ETH_P_IP) || + arp->ar_hln != ETH_ALEN || + arp->ar_pln != 4 || + (arp->ar_op != htons(ARPOP_REPLY) && + arp->ar_op != htons(ARPOP_REQUEST))) + return FLOW_DISSECT_RET_OUT_BAD; + + arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp), + sizeof(_arp_eth), data, + hlen, &_arp_eth); + if (!arp_eth) + return FLOW_DISSECT_RET_OUT_BAD; + + key_arp = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ARP, + target_container); + + memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip)); + memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip)); + + /* Only store the lower byte of the opcode; + * this covers ARPOP_REPLY and ARPOP_REQUEST. + */ + key_arp->op = ntohs(arp->ar_op) & 0xff; + + ether_addr_copy(key_arp->sha, arp_eth->ar_sha); + ether_addr_copy(key_arp->tha, arp_eth->ar_tha); + + return FLOW_DISSECT_RET_OUT_GOOD; +} + +static enum flow_dissect_ret +__skb_flow_dissect_gre(const struct sk_buff *skb, + struct flow_dissector_key_control *key_control, + struct flow_dissector *flow_dissector, + void *target_container, void *data, + __be16 *p_proto, int *p_nhoff, int *p_hlen, + unsigned int flags) +{ + struct flow_dissector_key_keyid *key_keyid; + struct gre_base_hdr *hdr, _hdr; + int offset = 0; + u16 gre_ver; + + hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), + data, *p_hlen, &_hdr); + if (!hdr) + return FLOW_DISSECT_RET_OUT_BAD; + + /* Only look inside GRE without routing */ + if (hdr->flags & GRE_ROUTING) + return FLOW_DISSECT_RET_OUT_GOOD; + + /* Only look inside GRE for version 0 and 1 */ + gre_ver = ntohs(hdr->flags & GRE_VERSION); + if (gre_ver > 1) + return FLOW_DISSECT_RET_OUT_GOOD; + + *p_proto = hdr->protocol; + if (gre_ver) { + /* Version1 must be PPTP, and check the flags */ + if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) + return FLOW_DISSECT_RET_OUT_GOOD; + } + + offset += sizeof(struct gre_base_hdr); + + if (hdr->flags & GRE_CSUM) + offset += sizeof(((struct gre_full_hdr *) 0)->csum) + + sizeof(((struct gre_full_hdr *) 0)->reserved1); + + if (hdr->flags & GRE_KEY) { + const __be32 *keyid; + __be32 _keyid; + + keyid = __skb_header_pointer(skb, *p_nhoff + offset, + sizeof(_keyid), + data, *p_hlen, &_keyid); + if (!keyid) + return FLOW_DISSECT_RET_OUT_BAD; + + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_GRE_KEYID)) { + key_keyid = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_GRE_KEYID, + target_container); + if (gre_ver == 0) + key_keyid->keyid = *keyid; + else + key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; + } + offset += sizeof(((struct gre_full_hdr *) 0)->key); + } + + if (hdr->flags & GRE_SEQ) + offset += sizeof(((struct pptp_gre_header *) 0)->seq); + + if (gre_ver == 0) { + if (*p_proto == htons(ETH_P_TEB)) { + const struct ethhdr *eth; + struct ethhdr _eth; + + eth = __skb_header_pointer(skb, *p_nhoff + offset, + sizeof(_eth), + data, *p_hlen, &_eth); + if (!eth) + return FLOW_DISSECT_RET_OUT_BAD; + *p_proto = eth->h_proto; + offset += sizeof(*eth); + + /* Cap headers that we access via pointers at the + * end of the Ethernet header as our maximum alignment + * at that point is only 2 bytes. + */ + if (NET_IP_ALIGN) + *p_hlen = *p_nhoff + offset; + } + } else { /* version 1, must be PPTP */ + u8 _ppp_hdr[PPP_HDRLEN]; + u8 *ppp_hdr; + + if (hdr->flags & GRE_ACK) + offset += sizeof(((struct pptp_gre_header *) 0)->ack); + + ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset, + sizeof(_ppp_hdr), + data, *p_hlen, _ppp_hdr); + if (!ppp_hdr) + return FLOW_DISSECT_RET_OUT_BAD; + + switch (PPP_PROTOCOL(ppp_hdr)) { + case PPP_IP: + *p_proto = htons(ETH_P_IP); + break; + case PPP_IPV6: + *p_proto = htons(ETH_P_IPV6); + break; + default: + /* Could probably catch some more like MPLS */ + break; + } + + offset += PPP_HDRLEN; + } + + *p_nhoff += offset; + key_control->flags |= FLOW_DIS_ENCAPSULATION; + if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) + return FLOW_DISSECT_RET_OUT_GOOD; + + return FLOW_DISSECT_RET_OUT_PROTO_AGAIN; +} + /** * __skb_flow_dissect - extract the flow_keys struct and return it * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified @@ -138,12 +348,10 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; - struct flow_dissector_key_arp *key_arp; struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_icmp *key_icmp; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; - struct flow_dissector_key_keyid *key_keyid; bool skip_vlan = false; u8 ip_proto = 0; bool ret; @@ -181,7 +389,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, memcpy(key_eth_addrs, ð->h_dest, sizeof(*key_eth_addrs)); } -again: +proto_again: switch (proto) { case htons(ETH_P_IP): { const struct iphdr *iph; @@ -284,7 +492,7 @@ ipv6: proto = vlan->h_vlan_encapsulated_proto; nhoff += sizeof(*vlan); if (skip_vlan) - goto again; + goto proto_again; } skip_vlan = true; @@ -307,7 +515,7 @@ ipv6: } } - goto again; + goto proto_again; } case htons(ETH_P_PPP_SES): { struct { @@ -349,31 +557,17 @@ ipv6: } case htons(ETH_P_MPLS_UC): - case htons(ETH_P_MPLS_MC): { - struct mpls_label *hdr, _hdr[2]; + case htons(ETH_P_MPLS_MC): mpls: - hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, - hlen, &_hdr); - if (!hdr) - goto out_bad; - - if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >> - MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) { - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) { - key_keyid = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_MPLS_ENTROPY, - target_container); - key_keyid->keyid = hdr[1].entry & - htonl(MPLS_LS_LABEL_MASK); - } - + switch (__skb_flow_dissect_mpls(skb, flow_dissector, + target_container, data, + nhoff, hlen)) { + case FLOW_DISSECT_RET_OUT_GOOD: goto out_good; + case FLOW_DISSECT_RET_OUT_BAD: + default: + goto out_bad; } - - goto out_good; - } - case htons(ETH_P_FCOE): if ((hlen - nhoff) < FCOE_HEADER_LEN) goto out_bad; @@ -382,177 +576,33 @@ mpls: goto out_good; case htons(ETH_P_ARP): - case htons(ETH_P_RARP): { - struct { - unsigned char ar_sha[ETH_ALEN]; - unsigned char ar_sip[4]; - unsigned char ar_tha[ETH_ALEN]; - unsigned char ar_tip[4]; - } *arp_eth, _arp_eth; - const struct arphdr *arp; - struct arphdr *_arp; - - arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data, - hlen, &_arp); - if (!arp) - goto out_bad; - - if (arp->ar_hrd != htons(ARPHRD_ETHER) || - arp->ar_pro != htons(ETH_P_IP) || - arp->ar_hln != ETH_ALEN || - arp->ar_pln != 4 || - (arp->ar_op != htons(ARPOP_REPLY) && - arp->ar_op != htons(ARPOP_REQUEST))) - goto out_bad; - - arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp), - sizeof(_arp_eth), data, - hlen, - &_arp_eth); - if (!arp_eth) + case htons(ETH_P_RARP): + switch (__skb_flow_dissect_arp(skb, flow_dissector, + target_container, data, + nhoff, hlen)) { + case FLOW_DISSECT_RET_OUT_GOOD: + goto out_good; + case FLOW_DISSECT_RET_OUT_BAD: + default: goto out_bad; - - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_ARP)) { - - key_arp = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_ARP, - target_container); - - memcpy(&key_arp->sip, arp_eth->ar_sip, - sizeof(key_arp->sip)); - memcpy(&key_arp->tip, arp_eth->ar_tip, - sizeof(key_arp->tip)); - - /* Only store the lower byte of the opcode; - * this covers ARPOP_REPLY and ARPOP_REQUEST. - */ - key_arp->op = ntohs(arp->ar_op) & 0xff; - - ether_addr_copy(key_arp->sha, arp_eth->ar_sha); - ether_addr_copy(key_arp->tha, arp_eth->ar_tha); } - - goto out_good; - } - default: goto out_bad; } ip_proto_again: switch (ip_proto) { - case IPPROTO_GRE: { - struct gre_base_hdr *hdr, _hdr; - u16 gre_ver; - int offset = 0; - - hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); - if (!hdr) + case IPPROTO_GRE: + switch (__skb_flow_dissect_gre(skb, key_control, flow_dissector, + target_container, data, + &proto, &nhoff, &hlen, flags)) { + case FLOW_DISSECT_RET_OUT_GOOD: + goto out_good; + case FLOW_DISSECT_RET_OUT_BAD: goto out_bad; - - /* Only look inside GRE without routing */ - if (hdr->flags & GRE_ROUTING) - break; - - /* Only look inside GRE for version 0 and 1 */ - gre_ver = ntohs(hdr->flags & GRE_VERSION); - if (gre_ver > 1) - break; - - proto = hdr->protocol; - if (gre_ver) { - /* Version1 must be PPTP, and check the flags */ - if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) - break; - } - - offset += sizeof(struct gre_base_hdr); - - if (hdr->flags & GRE_CSUM) - offset += sizeof(((struct gre_full_hdr *)0)->csum) + - sizeof(((struct gre_full_hdr *)0)->reserved1); - - if (hdr->flags & GRE_KEY) { - const __be32 *keyid; - __be32 _keyid; - - keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid), - data, hlen, &_keyid); - if (!keyid) - goto out_bad; - - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_GRE_KEYID)) { - key_keyid = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_GRE_KEYID, - target_container); - if (gre_ver == 0) - key_keyid->keyid = *keyid; - else - key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; - } - offset += sizeof(((struct gre_full_hdr *)0)->key); + case FLOW_DISSECT_RET_OUT_PROTO_AGAIN: + goto proto_again; } - - if (hdr->flags & GRE_SEQ) - offset += sizeof(((struct pptp_gre_header *)0)->seq); - - if (gre_ver == 0) { - if (proto == htons(ETH_P_TEB)) { - const struct ethhdr *eth; - struct ethhdr _eth; - - eth = __skb_header_pointer(skb, nhoff + offset, - sizeof(_eth), - data, hlen, &_eth); - if (!eth) - goto out_bad; - proto = eth->h_proto; - offset += sizeof(*eth); - - /* Cap headers that we access via pointers at the - * end of the Ethernet header as our maximum alignment - * at that point is only 2 bytes. - */ - if (NET_IP_ALIGN) - hlen = (nhoff + offset); - } - } else { /* version 1, must be PPTP */ - u8 _ppp_hdr[PPP_HDRLEN]; - u8 *ppp_hdr; - - if (hdr->flags & GRE_ACK) - offset += sizeof(((struct pptp_gre_header *)0)->ack); - - ppp_hdr = __skb_header_pointer(skb, nhoff + offset, - sizeof(_ppp_hdr), - data, hlen, _ppp_hdr); - if (!ppp_hdr) - goto out_bad; - - switch (PPP_PROTOCOL(ppp_hdr)) { - case PPP_IP: - proto = htons(ETH_P_IP); - break; - case PPP_IPV6: - proto = htons(ETH_P_IPV6); - break; - default: - /* Could probably catch some more like MPLS */ - break; - } - - offset += PPP_HDRLEN; - } - - nhoff += offset; - key_control->flags |= FLOW_DIS_ENCAPSULATION; - if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) - goto out_good; - - goto again; - } case NEXTHDR_HOP: case NEXTHDR_ROUTING: case NEXTHDR_DEST: { diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 0cfe7b0216c3..b3bc0a31af9f 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -209,7 +209,8 @@ static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog, int ret; u32 fd; - ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy); + ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy, + NULL); if (ret < 0) return ret; @@ -249,7 +250,7 @@ static int bpf_build_state(struct nlattr *nla, if (family != AF_INET && family != AF_INET6) return -EAFNOSUPPORT; - ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy); + ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, NULL); if (ret < 0) return ret; diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 6df9f8fabf0c..b5888190223c 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -162,7 +162,6 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining) struct rtnexthop *rtnh = (struct rtnexthop *)attr; struct nlattr *nla_entype; struct nlattr *attrs; - struct nlattr *nla; u16 encap_type; int attrlen; @@ -170,7 +169,6 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining) attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { attrs = rtnh_attrs(rtnh); - nla = nla_find(attrs, attrlen, RTA_ENCAP); nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla_entype) { diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e7c12caa20c8..31f37b264710 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -52,8 +52,9 @@ do { \ #define PNEIGH_HASHMASK 0xF static void neigh_timer_handler(unsigned long arg); -static void __neigh_notify(struct neighbour *n, int type, int flags); -static void neigh_update_notify(struct neighbour *neigh); +static void __neigh_notify(struct neighbour *n, int type, int flags, + u32 pid); +static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); #ifdef CONFIG_PROC_FS @@ -99,7 +100,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh) if (neigh->parms->neigh_cleanup) neigh->parms->neigh_cleanup(neigh); - __neigh_notify(neigh, RTM_DELNEIGH, 0); + __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); neigh_release(neigh); } @@ -860,7 +861,8 @@ static void neigh_probe(struct neighbour *neigh) if (skb) skb = skb_clone(skb, GFP_ATOMIC); write_unlock(&neigh->lock); - neigh->ops->solicit(neigh, skb); + if (neigh->ops->solicit) + neigh->ops->solicit(neigh, skb); atomic_inc(&neigh->probes); kfree_skb(skb); } @@ -948,7 +950,7 @@ out: } if (notify) - neigh_update_notify(neigh); + neigh_update_notify(neigh, 0); neigh_release(neigh); } @@ -1072,7 +1074,7 @@ static void neigh_update_hhs(struct neighbour *neigh) */ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, - u32 flags) + u32 flags, u32 nlmsg_pid) { u8 old; int err; @@ -1229,7 +1231,7 @@ out: write_unlock_bh(&neigh->lock); if (notify) - neigh_update_notify(neigh); + neigh_update_notify(neigh, nlmsg_pid); return err; } @@ -1260,7 +1262,7 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, lladdr || !dev->addr_len); if (neigh) neigh_update(neigh, lladdr, NUD_STALE, - NEIGH_UPDATE_F_OVERRIDE); + NEIGH_UPDATE_F_OVERRIDE, 0); return neigh; } EXPORT_SYMBOL(neigh_event_ns); @@ -1638,7 +1640,8 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE | - NEIGH_UPDATE_F_ADMIN); + NEIGH_UPDATE_F_ADMIN, + NETLINK_CB(skb).portid); neigh_release(neigh); out: @@ -1658,7 +1661,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) int err; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, NULL); if (err < 0) goto out; @@ -1729,7 +1732,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) neigh_event_send(neigh, NULL); err = 0; } else - err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); + err = neigh_update(neigh, lladdr, ndm->ndm_state, flags, + NETLINK_CB(skb).portid); neigh_release(neigh); out: @@ -1942,7 +1946,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) int err, tidx; err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, - nl_neightbl_policy); + nl_neightbl_policy, NULL); if (err < 0) goto errout; @@ -1980,7 +1984,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) int i, ifindex = 0; err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS], - nl_ntbl_parm_policy); + nl_ntbl_parm_policy, NULL); if (err < 0) goto errout_tbl_lock; @@ -2229,10 +2233,10 @@ nla_put_failure: return -EMSGSIZE; } -static void neigh_update_notify(struct neighbour *neigh) +static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid) { call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); - __neigh_notify(neigh, RTM_NEWNEIGH, 0); + __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid); } static bool neigh_master_filtered(struct net_device *dev, int master_idx) @@ -2271,7 +2275,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, unsigned int flags = NLM_F_MULTI; int err; - err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL); + err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL); if (!err) { if (tb[NDA_IFINDEX]) filter_idx = nla_get_u32(tb[NDA_IFINDEX]); @@ -2830,7 +2834,8 @@ static inline size_t neigh_nlmsg_size(void) + nla_total_size(4); /* NDA_PROBES */ } -static void __neigh_notify(struct neighbour *n, int type, int flags) +static void __neigh_notify(struct neighbour *n, int type, int flags, + u32 pid) { struct net *net = dev_net(n->dev); struct sk_buff *skb; @@ -2840,7 +2845,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) if (skb == NULL) goto errout; - err = neigh_fill_info(skb, n, 0, 0, type, flags); + err = neigh_fill_info(skb, n, pid, 0, type, flags); if (err < 0) { /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -2856,7 +2861,7 @@ errout: void neigh_app_ns(struct neighbour *n) { - __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); + __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0); } EXPORT_SYMBOL(neigh_app_ns); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 652468ff65b7..ec18cbc756d2 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -579,7 +579,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) int nsid, err; err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy); + rtnl_net_policy, NULL); if (err < 0) return err; if (!tb[NETNSA_NSID]) @@ -653,7 +653,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) int err, id; err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy); + rtnl_net_policy, NULL); if (err < 0) return err; if (tb[NETNSA_PID]) diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 6ae56037bb13..029a61ac6cdd 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -71,27 +71,17 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n) return 0; } -static void update_classid(struct cgroup_subsys_state *css, void *v) +static void cgrp_attach(struct cgroup_taskset *tset) { - struct css_task_iter it; + struct cgroup_subsys_state *css; struct task_struct *p; - css_task_iter_start(css, &it); - while ((p = css_task_iter_next(&it))) { + cgroup_taskset_for_each(p, css, tset) { task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, v); + iterate_fd(p->files, 0, update_classid_sock, + (void *)(unsigned long)css_cls_state(css)->classid); task_unlock(p); } - css_task_iter_end(&it); -} - -static void cgrp_attach(struct cgroup_taskset *tset) -{ - struct cgroup_subsys_state *css; - - cgroup_taskset_first(tset, &css); - update_classid(css, - (void *)(unsigned long)css_cls_state(css)->classid); } static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) @@ -103,12 +93,22 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, u64 value) { struct cgroup_cls_state *cs = css_cls_state(css); + struct css_task_iter it; + struct task_struct *p; cgroup_sk_alloc_disable(); cs->classid = (u32)value; - update_classid(css, (void *)(unsigned long)cs->classid); + css_task_iter_start(css, &it); + while ((p = css_task_iter_next(&it))) { + task_lock(p); + iterate_fd(p->files, 0, update_classid_sock, + (void *)(unsigned long)cs->classid); + task_unlock(p); + } + css_task_iter_end(&it); + return 0; } diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 0f9275ee5595..1c4810919a0a 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/module.h> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c4e84c558240..0ee5479528b5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1515,7 +1515,8 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla const struct rtnl_link_ops *ops = NULL; struct nlattr *linfo[IFLA_INFO_MAX + 1]; - if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, ifla_info_policy) < 0) + if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, + ifla_info_policy, NULL) < 0) return NULL; if (linfo[IFLA_INFO_KIND]) { @@ -1592,8 +1593,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ? sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) { - + if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, + ifla_policy, NULL) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); @@ -1640,9 +1641,10 @@ out: return skb->len; } -int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len) +int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, + struct netlink_ext_ack *exterr) { - return nla_parse(tb, IFLA_MAX, head, len, ifla_policy); + return nla_parse(tb, IFLA_MAX, head, len, ifla_policy, exterr); } EXPORT_SYMBOL(rtnl_nla_parse_ifla); @@ -2078,7 +2080,7 @@ static int do_setlink(const struct sk_buff *skb, goto errout; } err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, - ifla_vf_policy); + ifla_vf_policy, NULL); if (err < 0) goto errout; err = do_setvfinfo(dev, vfinfo); @@ -2106,7 +2108,7 @@ static int do_setlink(const struct sk_buff *skb, goto errout; } err = nla_parse_nested(port, IFLA_PORT_MAX, attr, - ifla_port_policy); + ifla_port_policy, NULL); if (err < 0) goto errout; if (!port[IFLA_PORT_VF]) { @@ -2126,7 +2128,8 @@ static int do_setlink(const struct sk_buff *skb, struct nlattr *port[IFLA_PORT_MAX+1]; err = nla_parse_nested(port, IFLA_PORT_MAX, - tb[IFLA_PORT_SELF], ifla_port_policy); + tb[IFLA_PORT_SELF], ifla_port_policy, + NULL); if (err < 0) goto errout; @@ -2170,7 +2173,7 @@ static int do_setlink(const struct sk_buff *skb, u32 xdp_flags = 0; err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP], - ifla_xdp_policy); + ifla_xdp_policy, NULL); if (err < 0) goto errout; @@ -2219,7 +2222,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *tb[IFLA_MAX+1]; char ifname[IFNAMSIZ]; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, NULL); if (err < 0) goto errout; @@ -2312,7 +2315,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *tb[IFLA_MAX+1]; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, NULL); if (err < 0) return err; @@ -2441,7 +2444,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) #ifdef CONFIG_MODULES replay: #endif - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, NULL); if (err < 0) return err; @@ -2472,7 +2475,8 @@ replay: if (tb[IFLA_LINKINFO]) { err = nla_parse_nested(linkinfo, IFLA_INFO_MAX, - tb[IFLA_LINKINFO], ifla_info_policy); + tb[IFLA_LINKINFO], ifla_info_policy, + NULL); if (err < 0) return err; } else @@ -2497,7 +2501,7 @@ replay: if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { err = nla_parse_nested(attr, ops->maxtype, linkinfo[IFLA_INFO_DATA], - ops->policy); + ops->policy, NULL); if (err < 0) return err; data = attr; @@ -2515,7 +2519,8 @@ replay: err = nla_parse_nested(slave_attr, m_ops->slave_maxtype, linkinfo[IFLA_INFO_SLAVE_DATA], - m_ops->slave_policy); + m_ops->slave_policy, + NULL); if (err < 0) return err; slave_data = slave_attr; @@ -2684,7 +2689,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh) int err; u32 ext_filter_mask = 0; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, NULL); if (err < 0) return err; @@ -2734,7 +2739,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) { + if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); } @@ -2965,7 +2970,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) u16 vid; int err; - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, NULL); if (err < 0) return err; @@ -3068,7 +3073,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, NULL); if (err < 0) return err; @@ -3203,8 +3208,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) int err = 0; int fidx = 0; - if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy) == 0) { + if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, + IFLA_MAX, ifla_policy, NULL) == 0) { if (tb[IFLA_MASTER]) br_idx = nla_get_u32(tb[IFLA_MASTER]); } @@ -4046,7 +4051,8 @@ out: /* Process one rtnetlink message. */ -static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); rtnl_doit_func doit; @@ -4116,22 +4122,16 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { - case NETDEV_UP: - case NETDEV_DOWN: - case NETDEV_PRE_UP: - case NETDEV_POST_INIT: - case NETDEV_REGISTER: - case NETDEV_CHANGE: - case NETDEV_PRE_TYPE_CHANGE: - case NETDEV_GOING_DOWN: - case NETDEV_UNREGISTER: - case NETDEV_UNREGISTER_FINAL: - case NETDEV_RELEASE: - case NETDEV_JOIN: - case NETDEV_BONDING_INFO: + case NETDEV_REBOOT: + case NETDEV_CHANGENAME: + case NETDEV_FEAT_CHANGE: + case NETDEV_BONDING_FAILOVER: + case NETDEV_NOTIFY_PEERS: + case NETDEV_RESEND_IGMP: + case NETDEV_CHANGEINFODATA: + rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); break; default: - rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); break; } return NOTIFY_DONE; @@ -4185,6 +4185,7 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL); rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, NULL); rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 758f140b6bed..6bd2f8fb0476 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -20,9 +20,11 @@ #include <net/tcp.h> static siphash_key_t net_secret __read_mostly; +static siphash_key_t ts_secret __read_mostly; static __always_inline void net_secret_init(void) { + net_get_random_once(&ts_secret, sizeof(ts_secret)); net_get_random_once(&net_secret, sizeof(net_secret)); } #endif @@ -45,8 +47,25 @@ static u32 seq_scale(u32 seq) #endif #if IS_ENABLED(CONFIG_IPV6) -u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport, u32 *tsoff) +static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) +{ + const struct { + struct in6_addr saddr; + struct in6_addr daddr; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .saddr = *(struct in6_addr *)saddr, + .daddr = *(struct in6_addr *)daddr, + }; + + if (sysctl_tcp_timestamps != 1) + return 0; + + return siphash(&combined, offsetofend(typeof(combined), daddr), + &ts_secret); +} + +u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport, u32 *tsoff) { const struct { struct in6_addr saddr; @@ -63,10 +82,10 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, net_secret_init(); hash = siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); - *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0; + *tsoff = secure_tcpv6_ts_off(saddr, daddr); return seq_scale(hash); } -EXPORT_SYMBOL(secure_tcpv6_sequence_number); +EXPORT_SYMBOL(secure_tcpv6_seq_and_tsoff); u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) @@ -88,22 +107,29 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET +static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) +{ + if (sysctl_tcp_timestamps != 1) + return 0; -/* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), + return siphash_2u32((__force u32)saddr, (__force u32)daddr, + &ts_secret); +} + +/* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), * but fortunately, `sport' cannot be 0 in any circumstances. If this changes, * it would be easy enough to have the former function use siphash_4u32, passing * the arguments as separate u32. */ - -u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport, u32 *tsoff) +u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport, u32 *tsoff) { u64 hash; net_secret_init(); hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, &net_secret); - *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0; + *tsoff = secure_tcp_ts_off(saddr, daddr); return seq_scale(hash); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cd4ba8c6b609..5d9a11eafbf5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3093,7 +3093,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, * containing the same amount of data. */ skb_walk_frags(head_skb, iter) { - if (skb_headlen(iter)) + if (skb_headlen(iter) && !iter->head_frag) goto normal; len -= iter->len; @@ -3694,6 +3694,15 @@ static void sock_rmem_free(struct sk_buff *skb) atomic_sub(skb->truesize, &sk->sk_rmem_alloc); } +static void skb_set_err_queue(struct sk_buff *skb) +{ + /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING. + * So, it is safe to (mis)use it to mark skbs on the error queue. + */ + skb->pkt_type = PACKET_OUTGOING; + BUILD_BUG_ON(PACKET_OUTGOING == 0); +} + /* * Note: We dont mem charge error packets (no sk_forward_alloc changes) */ @@ -3707,6 +3716,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_rmem_free; atomic_add(skb->truesize, &sk->sk_rmem_alloc); + skb_set_err_queue(skb); /* before exiting rcu section, make sure dst is refcounted */ skb_dst_force(skb); @@ -3783,16 +3793,20 @@ EXPORT_SYMBOL(skb_clone_sk); static void __skb_complete_tx_timestamp(struct sk_buff *skb, struct sock *sk, - int tstype) + int tstype, + bool opt_stats) { struct sock_exterr_skb *serr; int err; + BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb)); + serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = tstype; + serr->opt_stats = opt_stats; if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; if (sk->sk_protocol == IPPROTO_TCP && @@ -3833,7 +3847,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, */ if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { *skb_hwtstamps(skb) = *hwtstamps; - __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); + __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); sock_put(sk); } } @@ -3844,7 +3858,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, struct sock *sk, int tstype) { struct sk_buff *skb; - bool tsonly; + bool tsonly, opt_stats = false; if (!sk) return; @@ -3857,9 +3871,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, #ifdef CONFIG_INET if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && sk->sk_protocol == IPPROTO_TCP && - sk->sk_type == SOCK_STREAM) + sk->sk_type == SOCK_STREAM) { skb = tcp_get_timestamping_opt_stats(sk); - else + opt_stats = true; + } else #endif skb = alloc_skb(0, GFP_ATOMIC); } else { @@ -3878,7 +3893,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, else skb->tstamp = ktime_get_real(); - __skb_complete_tx_timestamp(skb, sk, tstype); + __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); } EXPORT_SYMBOL_GPL(__skb_tstamp_tx); diff --git a/net/core/sock.c b/net/core/sock.c index a96d5f7a5734..a06bb7a2a689 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -247,12 +247,66 @@ static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = { static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = { _sock_locks("k-clock-") }; +static const char *const af_family_rlock_key_strings[AF_MAX+1] = { + "rlock-AF_UNSPEC", "rlock-AF_UNIX" , "rlock-AF_INET" , + "rlock-AF_AX25" , "rlock-AF_IPX" , "rlock-AF_APPLETALK", + "rlock-AF_NETROM", "rlock-AF_BRIDGE" , "rlock-AF_ATMPVC" , + "rlock-AF_X25" , "rlock-AF_INET6" , "rlock-AF_ROSE" , + "rlock-AF_DECnet", "rlock-AF_NETBEUI" , "rlock-AF_SECURITY" , + "rlock-AF_KEY" , "rlock-AF_NETLINK" , "rlock-AF_PACKET" , + "rlock-AF_ASH" , "rlock-AF_ECONET" , "rlock-AF_ATMSVC" , + "rlock-AF_RDS" , "rlock-AF_SNA" , "rlock-AF_IRDA" , + "rlock-AF_PPPOX" , "rlock-AF_WANPIPE" , "rlock-AF_LLC" , + "rlock-27" , "rlock-28" , "rlock-AF_CAN" , + "rlock-AF_TIPC" , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV" , + "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" , + "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" , + "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" , + "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_MAX" +}; +static const char *const af_family_wlock_key_strings[AF_MAX+1] = { + "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" , + "wlock-AF_AX25" , "wlock-AF_IPX" , "wlock-AF_APPLETALK", + "wlock-AF_NETROM", "wlock-AF_BRIDGE" , "wlock-AF_ATMPVC" , + "wlock-AF_X25" , "wlock-AF_INET6" , "wlock-AF_ROSE" , + "wlock-AF_DECnet", "wlock-AF_NETBEUI" , "wlock-AF_SECURITY" , + "wlock-AF_KEY" , "wlock-AF_NETLINK" , "wlock-AF_PACKET" , + "wlock-AF_ASH" , "wlock-AF_ECONET" , "wlock-AF_ATMSVC" , + "wlock-AF_RDS" , "wlock-AF_SNA" , "wlock-AF_IRDA" , + "wlock-AF_PPPOX" , "wlock-AF_WANPIPE" , "wlock-AF_LLC" , + "wlock-27" , "wlock-28" , "wlock-AF_CAN" , + "wlock-AF_TIPC" , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV" , + "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" , + "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" , + "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" , + "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_MAX" +}; +static const char *const af_family_elock_key_strings[AF_MAX+1] = { + "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" , + "elock-AF_AX25" , "elock-AF_IPX" , "elock-AF_APPLETALK", + "elock-AF_NETROM", "elock-AF_BRIDGE" , "elock-AF_ATMPVC" , + "elock-AF_X25" , "elock-AF_INET6" , "elock-AF_ROSE" , + "elock-AF_DECnet", "elock-AF_NETBEUI" , "elock-AF_SECURITY" , + "elock-AF_KEY" , "elock-AF_NETLINK" , "elock-AF_PACKET" , + "elock-AF_ASH" , "elock-AF_ECONET" , "elock-AF_ATMSVC" , + "elock-AF_RDS" , "elock-AF_SNA" , "elock-AF_IRDA" , + "elock-AF_PPPOX" , "elock-AF_WANPIPE" , "elock-AF_LLC" , + "elock-27" , "elock-28" , "elock-AF_CAN" , + "elock-AF_TIPC" , "elock-AF_BLUETOOTH", "elock-AF_IUCV" , + "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" , + "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" , + "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" , + "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_MAX" +}; /* - * sk_callback_lock locking rules are per-address-family, + * sk_callback_lock and sk queues locking rules are per-address-family, * so split the lock classes by using a per-AF key: */ static struct lock_class_key af_callback_keys[AF_MAX]; +static struct lock_class_key af_rlock_keys[AF_MAX]; +static struct lock_class_key af_wlock_keys[AF_MAX]; +static struct lock_class_key af_elock_keys[AF_MAX]; static struct lock_class_key af_kern_callback_keys[AF_MAX]; /* Take into consideration the size of the struct sk_buff overhead in the @@ -1029,6 +1083,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, union { int val; + u64 val64; struct linger ling; struct timeval tm; } v; @@ -1259,6 +1314,40 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_incoming_cpu; break; + case SO_MEMINFO: + { + u32 meminfo[SK_MEMINFO_VARS]; + + if (get_user(len, optlen)) + return -EFAULT; + + sk_get_meminfo(sk, meminfo); + + len = min_t(unsigned int, len, sizeof(meminfo)); + if (copy_to_user(optval, &meminfo, len)) + return -EFAULT; + + goto lenout; + } + +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_INCOMING_NAPI_ID: + v.val = READ_ONCE(sk->sk_napi_id); + + /* aggregate non-NAPI IDs down to 0 */ + if (v.val < MIN_NAPI_ID) + v.val = 0; + + break; +#endif + + case SO_COOKIE: + lv = sizeof(u64); + if (len < lv) + return -EINVAL; + v.val64 = sock_gen_cookie(sk); + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). @@ -1442,6 +1531,11 @@ static void __sk_destruct(struct rcu_head *head) pr_debug("%s: optmem leakage (%d bytes) detected\n", __func__, atomic_read(&sk->sk_omem_alloc)); + if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + } + if (sk->sk_peer_cred) put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); @@ -1478,6 +1572,27 @@ void sk_free(struct sock *sk) } EXPORT_SYMBOL(sk_free); +static void sk_init_common(struct sock *sk) +{ + skb_queue_head_init(&sk->sk_receive_queue); + skb_queue_head_init(&sk->sk_write_queue); + skb_queue_head_init(&sk->sk_error_queue); + + rwlock_init(&sk->sk_callback_lock); + lockdep_set_class_and_name(&sk->sk_receive_queue.lock, + af_rlock_keys + sk->sk_family, + af_family_rlock_key_strings[sk->sk_family]); + lockdep_set_class_and_name(&sk->sk_write_queue.lock, + af_wlock_keys + sk->sk_family, + af_family_wlock_key_strings[sk->sk_family]); + lockdep_set_class_and_name(&sk->sk_error_queue.lock, + af_elock_keys + sk->sk_family, + af_family_elock_key_strings[sk->sk_family]); + lockdep_set_class_and_name(&sk->sk_callback_lock, + af_callback_keys + sk->sk_family, + af_family_clock_key_strings[sk->sk_family]); +} + /** * sk_clone_lock - clone a socket, and lock its clone * @sk: the socket to clone @@ -1511,13 +1626,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) */ atomic_set(&newsk->sk_wmem_alloc, 1); atomic_set(&newsk->sk_omem_alloc, 0); - skb_queue_head_init(&newsk->sk_receive_queue); - skb_queue_head_init(&newsk->sk_write_queue); - - rwlock_init(&newsk->sk_callback_lock); - lockdep_set_class_and_name(&newsk->sk_callback_lock, - af_callback_keys + newsk->sk_family, - af_family_clock_key_strings[newsk->sk_family]); + sk_init_common(newsk); newsk->sk_dst_cache = NULL; newsk->sk_dst_pending_confirm = 0; @@ -1528,7 +1637,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; sock_reset_flag(newsk, SOCK_DONE); - skb_queue_head_init(&newsk->sk_error_queue); filter = rcu_dereference_protected(newsk->sk_filter, 1); if (filter != NULL) @@ -1539,6 +1647,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) is_charged = sk_filter_charge(newsk, filter); if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { + /* We need to make sure that we don't uncharge the new + * socket if we couldn't charge it in the first place + * as otherwise we uncharge the parent's filter. + */ + if (!is_charged) + RCU_INIT_POINTER(newsk->sk_filter, NULL); sk_free_unlock_clone(newsk); newsk = NULL; goto out; @@ -2455,10 +2569,7 @@ EXPORT_SYMBOL(sk_stop_timer); void sock_init_data(struct socket *sock, struct sock *sk) { - skb_queue_head_init(&sk->sk_receive_queue); - skb_queue_head_init(&sk->sk_write_queue); - skb_queue_head_init(&sk->sk_error_queue); - + sk_init_common(sk); sk->sk_send_head = NULL; init_timer(&sk->sk_timer); @@ -2510,7 +2621,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; - sk->sk_stamp = ktime_set(-1L, 0); + sk->sk_stamp = SK_DEFAULT_STAMP; #ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; @@ -2787,15 +2898,25 @@ void sk_common_release(struct sock *sk) sk_refcnt_debug_release(sk); - if (sk->sk_frag.page) { - put_page(sk->sk_frag.page); - sk->sk_frag.page = NULL; - } - sock_put(sk); } EXPORT_SYMBOL(sk_common_release); +void sk_get_meminfo(const struct sock *sk, u32 *mem) +{ + memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); + + mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); + mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; + mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); + mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; + mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; + mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; + mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); + mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; + mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); +} + #ifdef CONFIG_PROC_FS #define PROTO_INUSE_NR 64 /* should be enough for the first time */ struct prot_inuse { @@ -3136,3 +3257,14 @@ static int __init proto_init(void) subsys_initcall(proto_init); #endif /* PROC_FS */ + +#ifdef CONFIG_NET_RX_BUSY_POLL +bool sk_busy_loop_end(void *p, unsigned long start_time) +{ + struct sock *sk = p; + + return !skb_queue_empty(&sk->sk_receive_queue) || + sk_busy_loop_timeout(sk, start_time); +} +EXPORT_SYMBOL(sk_busy_loop_end); +#endif /* CONFIG_NET_RX_BUSY_POLL */ diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 6b10573cc9fa..217f4e3b82f6 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -19,7 +19,7 @@ static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); static struct workqueue_struct *broadcast_wq; -static u64 sock_gen_cookie(struct sock *sk) +u64 sock_gen_cookie(struct sock *sk) { while (1) { u64 res = atomic64_read(&sk->sk_cookie); @@ -59,15 +59,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) { u32 mem[SK_MEMINFO_VARS]; - mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); - mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; - mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); - mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; - mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; - mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; - mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); - mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; - mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); + sk_get_meminfo(sk, mem); return nla_put(skb, attrtype, sizeof(mem), &mem); } @@ -246,7 +238,8 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } -static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { int ret; diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 9a1a352fd1eb..eed1ebf7f29d 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -13,9 +13,9 @@ static DEFINE_SPINLOCK(reuseport_lock); -static struct sock_reuseport *__reuseport_alloc(u16 max_socks) +static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks) { - size_t size = sizeof(struct sock_reuseport) + + unsigned int size = sizeof(struct sock_reuseport) + sizeof(struct sock *) * max_socks; struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 4ead336e14ea..7f9cc400eca0 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -408,14 +408,16 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_net_busy_poll, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "busy_read", .data = &sysctl_net_busy_read, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, #endif #ifdef CONFIG_NET_SCHED diff --git a/net/core/utils.c b/net/core/utils.c index 6592d7bbed39..d758880c09a7 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -51,7 +51,7 @@ EXPORT_SYMBOL(net_ratelimit); __be32 in_aton(const char *str) { - unsigned long l; + unsigned int l; unsigned int val; int i; diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 3202d75329b5..3f5a5f710576 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -245,8 +245,7 @@ static int dcbnl_getpfccfg(struct net_device *netdev, struct nlmsghdr *nlh, return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, - tb[DCB_ATTR_PFC_CFG], - dcbnl_pfc_up_nest); + tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest, NULL); if (ret) return ret; @@ -304,7 +303,7 @@ static int dcbnl_getcap(struct net_device *netdev, struct nlmsghdr *nlh, return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_CAP_ATTR_MAX, tb[DCB_ATTR_CAP], - dcbnl_cap_nest); + dcbnl_cap_nest, NULL); if (ret) return ret; @@ -348,7 +347,7 @@ static int dcbnl_getnumtcs(struct net_device *netdev, struct nlmsghdr *nlh, return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], - dcbnl_numtcs_nest); + dcbnl_numtcs_nest, NULL); if (ret) return ret; @@ -393,7 +392,7 @@ static int dcbnl_setnumtcs(struct net_device *netdev, struct nlmsghdr *nlh, return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], - dcbnl_numtcs_nest); + dcbnl_numtcs_nest, NULL); if (ret) return ret; @@ -452,7 +451,7 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlmsghdr *nlh, return -EINVAL; ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], - dcbnl_app_nest); + dcbnl_app_nest, NULL); if (ret) return ret; @@ -520,7 +519,7 @@ static int dcbnl_setapp(struct net_device *netdev, struct nlmsghdr *nlh, return -EINVAL; ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], - dcbnl_app_nest); + dcbnl_app_nest, NULL); if (ret) return ret; @@ -577,8 +576,8 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->getpgbwgcfgrx) return -EOPNOTSUPP; - ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, - tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest); + ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG], + dcbnl_pg_nest, NULL); if (ret) return ret; @@ -597,8 +596,8 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, data = pg_tb[DCB_PG_ATTR_TC_ALL]; else data = pg_tb[i]; - ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, - data, dcbnl_tc_param_nest); + ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, data, + dcbnl_tc_param_nest, NULL); if (ret) goto err_pg; @@ -735,8 +734,7 @@ static int dcbnl_setpfccfg(struct net_device *netdev, struct nlmsghdr *nlh, return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, - tb[DCB_ATTR_PFC_CFG], - dcbnl_pfc_up_nest); + tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest, NULL); if (ret) return ret; @@ -791,8 +789,8 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->setpgbwgcfgrx) return -EOPNOTSUPP; - ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, - tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest); + ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG], + dcbnl_pg_nest, NULL); if (ret) return ret; @@ -801,7 +799,7 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, continue; ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, - pg_tb[i], dcbnl_tc_param_nest); + pg_tb[i], dcbnl_tc_param_nest, NULL); if (ret) return ret; @@ -889,8 +887,8 @@ static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->getbcncfg) return -EOPNOTSUPP; - ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX, - tb[DCB_ATTR_BCN], dcbnl_bcn_nest); + ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN], + dcbnl_bcn_nest, NULL); if (ret) return ret; @@ -948,9 +946,8 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->setbcnrp) return -EOPNOTSUPP; - ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX, - tb[DCB_ATTR_BCN], - dcbnl_pfc_up_nest); + ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN], + dcbnl_pfc_up_nest, NULL); if (ret) return ret; @@ -1424,8 +1421,8 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_IEEE]) return -EINVAL; - err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, - tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); + err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, tb[DCB_ATTR_IEEE], + dcbnl_ieee_policy, NULL); if (err) return err; @@ -1508,8 +1505,8 @@ static int dcbnl_ieee_del(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_IEEE]) return -EINVAL; - err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, - tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); + err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, tb[DCB_ATTR_IEEE], + dcbnl_ieee_policy, NULL); if (err) return err; @@ -1581,8 +1578,8 @@ static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_FEATCFG]) return -EINVAL; - ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG], - dcbnl_featcfg_nest); + ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, + tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest, NULL); if (ret) return ret; @@ -1625,8 +1622,8 @@ static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_FEATCFG]) return -EINVAL; - ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG], - dcbnl_featcfg_nest); + ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, + tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest, NULL); if (ret) goto err; @@ -1715,7 +1712,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh) return -EPERM; ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, - dcbnl_rtnl_policy); + dcbnl_rtnl_policy, NULL); if (ret < 0) return ret; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 7de5b40a5d0d..9afa2a5030b2 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -132,6 +132,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include <net/neighbour.h> #include <net/dst.h> #include <net/fib_rules.h> +#include <net/tcp.h> #include <net/dn.h> #include <net/dn_nsp.h> #include <net/dn_dev.h> @@ -1469,18 +1470,18 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us case DSO_NODELAY: if (optlen != sizeof(int)) return -EINVAL; - if (scp->nonagle == 2) + if (scp->nonagle == TCP_NAGLE_CORK) return -EINVAL; - scp->nonagle = (u.val == 0) ? 0 : 1; + scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_OFF; /* if (scp->nonagle == 1) { Push pending frames } */ break; case DSO_CORK: if (optlen != sizeof(int)) return -EINVAL; - if (scp->nonagle == 1) + if (scp->nonagle == TCP_NAGLE_OFF) return -EINVAL; - scp->nonagle = (u.val == 0) ? 0 : 2; + scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_CORK; /* if (scp->nonagle == 0) { Push pending frames } */ break; @@ -1608,14 +1609,14 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us case DSO_NODELAY: if (r_len > sizeof(int)) r_len = sizeof(int); - val = (scp->nonagle == 1); + val = (scp->nonagle == TCP_NAGLE_OFF); r_data = &val; break; case DSO_CORK: if (r_len > sizeof(int)) r_len = sizeof(int); - val = (scp->nonagle == 2); + val = (scp->nonagle == TCP_NAGLE_CORK); r_data = &val; break; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 8fdd9f492b0e..e65f1be44e8e 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -581,7 +581,7 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) if (!net_eq(net, &init_net)) goto errout; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy, NULL); if (err < 0) goto errout; @@ -625,7 +625,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy, NULL); if (err < 0) return err; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 7af0ba6157a1..34663bf8aa6d 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -515,7 +515,8 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy); + err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy, + NULL); if (err < 0) return err; @@ -540,7 +541,8 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy); + err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy, + NULL); if (err < 0) return err; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index b1dc096d22f8..2d7097bbc666 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1654,7 +1654,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy); + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy, NULL); if (err < 0) return err; diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 85f2fdc360c2..c8bf5136a72b 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -96,7 +96,7 @@ static unsigned int dnrmg_hook(void *priv, } -#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err), NULL); return; } while (0) static inline void dnrmg_receive_user_skb(struct sk_buff *skb) { diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 9649238eef40..aa21f49f1215 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -6,7 +6,7 @@ config HAVE_NET_DSA config NET_DSA tristate "Distributed Switch Architecture" - depends on HAVE_NET_DSA + depends on HAVE_NET_DSA && MAY_USE_DEVLINK select NET_SWITCHDEV select PHYLIB ---help--- @@ -31,4 +31,6 @@ config NET_DSA_TAG_TRAILER config NET_DSA_TAG_QCA bool +config NET_DSA_TAG_MTK + bool endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 31d343796251..11a082d7e103 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,6 +1,6 @@ # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o slave.o dsa2.o switch.o +dsa_core-y += dsa.o slave.o dsa2.o switch.o legacy.o # tagging formats dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o @@ -8,3 +8,4 @@ dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o +dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index b6d4f6a23f06..e117047174fc 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -14,15 +14,17 @@ #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/module.h> -#include <net/dsa.h> #include <linux/of.h> #include <linux/of_mdio.h> #include <linux/of_platform.h> #include <linux/of_net.h> #include <linux/of_gpio.h> +#include <linux/netdevice.h> #include <linux/sysfs.h> #include <linux/phy_fixed.h> #include <linux/gpio/consumer.h> +#include <linux/etherdevice.h> +#include <net/dsa.h> #include "dsa_priv.h" static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb, @@ -53,62 +55,12 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { #ifdef CONFIG_NET_DSA_TAG_QCA [DSA_TAG_PROTO_QCA] = &qca_netdev_ops, #endif +#ifdef CONFIG_NET_DSA_TAG_MTK + [DSA_TAG_PROTO_MTK] = &mtk_netdev_ops, +#endif [DSA_TAG_PROTO_NONE] = &none_ops, }; -/* switch driver registration ***********************************************/ -static DEFINE_MUTEX(dsa_switch_drivers_mutex); -static LIST_HEAD(dsa_switch_drivers); - -void register_switch_driver(struct dsa_switch_driver *drv) -{ - mutex_lock(&dsa_switch_drivers_mutex); - list_add_tail(&drv->list, &dsa_switch_drivers); - mutex_unlock(&dsa_switch_drivers_mutex); -} -EXPORT_SYMBOL_GPL(register_switch_driver); - -void unregister_switch_driver(struct dsa_switch_driver *drv) -{ - mutex_lock(&dsa_switch_drivers_mutex); - list_del_init(&drv->list); - mutex_unlock(&dsa_switch_drivers_mutex); -} -EXPORT_SYMBOL_GPL(unregister_switch_driver); - -static const struct dsa_switch_ops * -dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, - const char **_name, void **priv) -{ - const struct dsa_switch_ops *ret; - struct list_head *list; - const char *name; - - ret = NULL; - name = NULL; - - mutex_lock(&dsa_switch_drivers_mutex); - list_for_each(list, &dsa_switch_drivers) { - const struct dsa_switch_ops *ops; - struct dsa_switch_driver *drv; - - drv = list_entry(list, struct dsa_switch_driver, list); - ops = drv->ops; - - name = ops->probe(parent, host_dev, sw_addr, priv); - if (name != NULL) { - ret = ops; - break; - } - } - mutex_unlock(&dsa_switch_drivers_mutex); - - *_name = name; - - return ret; -} - -/* basic switch operations **************************************************/ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, struct dsa_port *dport, int port) { @@ -140,23 +92,6 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, return 0; } -static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev) -{ - struct dsa_port *dport; - int ret, port; - - for (port = 0; port < ds->num_ports; port++) { - if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) - continue; - - dport = &ds->ports[port]; - ret = dsa_cpu_dsa_setup(ds, dev, dport, port); - if (ret) - return ret; - } - return 0; -} - const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) { const struct dsa_device_ops *ops; @@ -206,168 +141,6 @@ void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds) master->ethtool_ops = ds->dst->master_orig_ethtool_ops; } -static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) -{ - const struct dsa_switch_ops *ops = ds->ops; - struct dsa_switch_tree *dst = ds->dst; - struct dsa_chip_data *cd = ds->cd; - bool valid_name_found = false; - int index = ds->index; - int i, ret; - - /* - * Validate supplied switch configuration. - */ - for (i = 0; i < ds->num_ports; i++) { - char *name; - - name = cd->port_names[i]; - if (name == NULL) - continue; - - if (!strcmp(name, "cpu")) { - if (dst->cpu_switch) { - netdev_err(dst->master_netdev, - "multiple cpu ports?!\n"); - return -EINVAL; - } - dst->cpu_switch = ds; - dst->cpu_port = i; - ds->cpu_port_mask |= 1 << i; - } else if (!strcmp(name, "dsa")) { - ds->dsa_port_mask |= 1 << i; - } else { - ds->enabled_port_mask |= 1 << i; - } - valid_name_found = true; - } - - if (!valid_name_found && i == ds->num_ports) - return -EINVAL; - - /* Make the built-in MII bus mask match the number of ports, - * switch drivers can override this later - */ - ds->phys_mii_mask = ds->enabled_port_mask; - - /* - * If the CPU connects to this switch, set the switch tree - * tagging protocol to the preferred tagging format of this - * switch. - */ - if (dst->cpu_switch == ds) { - enum dsa_tag_protocol tag_protocol; - - tag_protocol = ops->get_tag_protocol(ds); - dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); - if (IS_ERR(dst->tag_ops)) - return PTR_ERR(dst->tag_ops); - - dst->rcv = dst->tag_ops->rcv; - } - - memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable)); - - /* - * Do basic register setup. - */ - ret = ops->setup(ds); - if (ret < 0) - return ret; - - ret = dsa_switch_register_notifier(ds); - if (ret) - return ret; - - if (ops->set_addr) { - ret = ops->set_addr(ds, dst->master_netdev->dev_addr); - if (ret < 0) - return ret; - } - - if (!ds->slave_mii_bus && ops->phy_read) { - ds->slave_mii_bus = devm_mdiobus_alloc(parent); - if (!ds->slave_mii_bus) - return -ENOMEM; - dsa_slave_mii_bus_init(ds); - - ret = mdiobus_register(ds->slave_mii_bus); - if (ret < 0) - return ret; - } - - /* - * Create network devices for physical switch ports. - */ - for (i = 0; i < ds->num_ports; i++) { - ds->ports[i].dn = cd->port_dn[i]; - - if (!(ds->enabled_port_mask & (1 << i))) - continue; - - ret = dsa_slave_create(ds, parent, i, cd->port_names[i]); - if (ret < 0) - netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n", - index, i, cd->port_names[i], ret); - } - - /* Perform configuration of the CPU and DSA ports */ - ret = dsa_cpu_dsa_setups(ds, parent); - if (ret < 0) - netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n", - index); - - ret = dsa_cpu_port_ethtool_setup(ds); - if (ret) - return ret; - - return 0; -} - -static struct dsa_switch * -dsa_switch_setup(struct dsa_switch_tree *dst, int index, - struct device *parent, struct device *host_dev) -{ - struct dsa_chip_data *cd = dst->pd->chip + index; - const struct dsa_switch_ops *ops; - struct dsa_switch *ds; - int ret; - const char *name; - void *priv; - - /* - * Probe for switch model. - */ - ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); - if (!ops) { - netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", - index); - return ERR_PTR(-EINVAL); - } - netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n", - index, name); - - - /* - * Allocate and initialise switch state. - */ - ds = dsa_switch_alloc(parent, DSA_MAX_PORTS); - if (!ds) - return ERR_PTR(-ENOMEM); - - ds->dst = dst; - ds->index = index; - ds->cd = cd; - ds->ops = ops; - ds->priv = priv; - - ret = dsa_switch_setup_one(ds, parent); - if (ret) - return ERR_PTR(ret); - - return ds; -} - void dsa_cpu_dsa_destroy(struct dsa_port *port) { struct device_node *port_dn = port->dn; @@ -376,86 +149,6 @@ void dsa_cpu_dsa_destroy(struct dsa_port *port) of_phy_deregister_fixed_link(port_dn); } -static void dsa_switch_destroy(struct dsa_switch *ds) -{ - int port; - - /* Destroy network devices for physical switch ports. */ - for (port = 0; port < ds->num_ports; port++) { - if (!(ds->enabled_port_mask & (1 << port))) - continue; - - if (!ds->ports[port].netdev) - continue; - - dsa_slave_destroy(ds->ports[port].netdev); - } - - /* Disable configuration of the CPU and DSA ports */ - for (port = 0; port < ds->num_ports; port++) { - if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) - continue; - dsa_cpu_dsa_destroy(&ds->ports[port]); - - /* Clearing a bit which is not set does no harm */ - ds->cpu_port_mask |= ~(1 << port); - ds->dsa_port_mask |= ~(1 << port); - } - - if (ds->slave_mii_bus && ds->ops->phy_read) - mdiobus_unregister(ds->slave_mii_bus); - - dsa_switch_unregister_notifier(ds); -} - -#ifdef CONFIG_PM_SLEEP -int dsa_switch_suspend(struct dsa_switch *ds) -{ - int i, ret = 0; - - /* Suspend slave network devices */ - for (i = 0; i < ds->num_ports; i++) { - if (!dsa_is_port_initialized(ds, i)) - continue; - - ret = dsa_slave_suspend(ds->ports[i].netdev); - if (ret) - return ret; - } - - if (ds->ops->suspend) - ret = ds->ops->suspend(ds); - - return ret; -} -EXPORT_SYMBOL_GPL(dsa_switch_suspend); - -int dsa_switch_resume(struct dsa_switch *ds) -{ - int i, ret = 0; - - if (ds->ops->resume) - ret = ds->ops->resume(ds); - - if (ret) - return ret; - - /* Resume slave network devices */ - for (i = 0; i < ds->num_ports; i++) { - if (!dsa_is_port_initialized(ds, i)) - continue; - - ret = dsa_slave_resume(ds->ports[i].netdev); - if (ret) - return ret; - } - - return 0; -} -EXPORT_SYMBOL_GPL(dsa_switch_resume); -#endif - -/* platform driver init and cleanup *****************************************/ static int dev_is_class(struct device *dev, void *class) { if (dev->class != NULL && !strcmp(dev->class->name, class)) @@ -474,24 +167,6 @@ static struct device *dev_find_class(struct device *parent, char *class) return device_find_child(parent, class, dev_is_class); } -struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) -{ - struct device *d; - - d = dev_find_class(dev, "mdio_bus"); - if (d != NULL) { - struct mii_bus *bus; - - bus = to_mii_bus(d); - put_device(d); - - return bus; - } - - return NULL; -} -EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus); - struct net_device *dsa_dev_to_net_device(struct device *dev) { struct device *d; @@ -511,456 +186,43 @@ struct net_device *dsa_dev_to_net_device(struct device *dev) } EXPORT_SYMBOL_GPL(dsa_dev_to_net_device); -#ifdef CONFIG_OF -static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, - struct dsa_chip_data *cd, - int chip_index, int port_index, - struct device_node *link) -{ - const __be32 *reg; - int link_sw_addr; - struct device_node *parent_sw; - int len; - - parent_sw = of_get_parent(link); - if (!parent_sw) - return -EINVAL; - - reg = of_get_property(parent_sw, "reg", &len); - if (!reg || (len != sizeof(*reg) * 2)) - return -EINVAL; - - /* - * Get the destination switch number from the second field of its 'reg' - * property, i.e. for "reg = <0x19 1>" sw_addr is '1'. - */ - link_sw_addr = be32_to_cpup(reg + 1); - - if (link_sw_addr >= pd->nr_chips) - return -EINVAL; - - cd->rtable[link_sw_addr] = port_index; - - return 0; -} - -static int dsa_of_probe_links(struct dsa_platform_data *pd, - struct dsa_chip_data *cd, - int chip_index, int port_index, - struct device_node *port, - const char *port_name) -{ - struct device_node *link; - int link_index; - int ret; - - for (link_index = 0;; link_index++) { - link = of_parse_phandle(port, "link", link_index); - if (!link) - break; - - if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) { - ret = dsa_of_setup_routing_table(pd, cd, chip_index, - port_index, link); - if (ret) - return ret; - } - } - return 0; -} - -static void dsa_of_free_platform_data(struct dsa_platform_data *pd) -{ - int i; - int port_index; - - for (i = 0; i < pd->nr_chips; i++) { - port_index = 0; - while (port_index < DSA_MAX_PORTS) { - kfree(pd->chip[i].port_names[port_index]); - port_index++; - } - - /* Drop our reference to the MDIO bus device */ - if (pd->chip[i].host_dev) - put_device(pd->chip[i].host_dev); - } - kfree(pd->chip); -} - -static int dsa_of_probe(struct device *dev) -{ - struct device_node *np = dev->of_node; - struct device_node *child, *mdio, *ethernet, *port; - struct mii_bus *mdio_bus, *mdio_bus_switch; - struct net_device *ethernet_dev; - struct dsa_platform_data *pd; - struct dsa_chip_data *cd; - const char *port_name; - int chip_index, port_index; - const unsigned int *sw_addr, *port_reg; - u32 eeprom_len; - int ret; - - mdio = of_parse_phandle(np, "dsa,mii-bus", 0); - if (!mdio) - return -EINVAL; - - mdio_bus = of_mdio_find_bus(mdio); - if (!mdio_bus) - return -EPROBE_DEFER; - - ethernet = of_parse_phandle(np, "dsa,ethernet", 0); - if (!ethernet) { - ret = -EINVAL; - goto out_put_mdio; - } - - ethernet_dev = of_find_net_device_by_node(ethernet); - if (!ethernet_dev) { - ret = -EPROBE_DEFER; - goto out_put_mdio; - } - - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) { - ret = -ENOMEM; - goto out_put_ethernet; - } - - dev->platform_data = pd; - pd->of_netdev = ethernet_dev; - pd->nr_chips = of_get_available_child_count(np); - if (pd->nr_chips > DSA_MAX_SWITCHES) - pd->nr_chips = DSA_MAX_SWITCHES; - - pd->chip = kcalloc(pd->nr_chips, sizeof(struct dsa_chip_data), - GFP_KERNEL); - if (!pd->chip) { - ret = -ENOMEM; - goto out_free; - } - - chip_index = -1; - for_each_available_child_of_node(np, child) { - int i; - - chip_index++; - cd = &pd->chip[chip_index]; - - cd->of_node = child; - - /* Initialize the routing table */ - for (i = 0; i < DSA_MAX_SWITCHES; ++i) - cd->rtable[i] = DSA_RTABLE_NONE; - - /* When assigning the host device, increment its refcount */ - cd->host_dev = get_device(&mdio_bus->dev); - - sw_addr = of_get_property(child, "reg", NULL); - if (!sw_addr) - continue; - - cd->sw_addr = be32_to_cpup(sw_addr); - if (cd->sw_addr >= PHY_MAX_ADDR) - continue; - - if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) - cd->eeprom_len = eeprom_len; - - mdio = of_parse_phandle(child, "mii-bus", 0); - if (mdio) { - mdio_bus_switch = of_mdio_find_bus(mdio); - if (!mdio_bus_switch) { - ret = -EPROBE_DEFER; - goto out_free_chip; - } - - /* Drop the mdio_bus device ref, replacing the host - * device with the mdio_bus_switch device, keeping - * the refcount from of_mdio_find_bus() above. - */ - put_device(cd->host_dev); - cd->host_dev = &mdio_bus_switch->dev; - } - - for_each_available_child_of_node(child, port) { - port_reg = of_get_property(port, "reg", NULL); - if (!port_reg) - continue; - - port_index = be32_to_cpup(port_reg); - if (port_index >= DSA_MAX_PORTS) - break; - - port_name = of_get_property(port, "label", NULL); - if (!port_name) - continue; - - cd->port_dn[port_index] = port; - - cd->port_names[port_index] = kstrdup(port_name, - GFP_KERNEL); - if (!cd->port_names[port_index]) { - ret = -ENOMEM; - goto out_free_chip; - } - - ret = dsa_of_probe_links(pd, cd, chip_index, - port_index, port, port_name); - if (ret) - goto out_free_chip; - - } - } - - /* The individual chips hold their own refcount on the mdio bus, - * so drop ours */ - put_device(&mdio_bus->dev); - - return 0; - -out_free_chip: - dsa_of_free_platform_data(pd); -out_free: - kfree(pd); - dev->platform_data = NULL; -out_put_ethernet: - put_device(ðernet_dev->dev); -out_put_mdio: - put_device(&mdio_bus->dev); - return ret; -} - -static void dsa_of_remove(struct device *dev) -{ - struct dsa_platform_data *pd = dev->platform_data; - - if (!dev->of_node) - return; - - dsa_of_free_platform_data(pd); - put_device(&pd->of_netdev->dev); - kfree(pd); -} -#else -static inline int dsa_of_probe(struct device *dev) -{ - return 0; -} - -static inline void dsa_of_remove(struct device *dev) -{ -} -#endif - -static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, - struct device *parent, struct dsa_platform_data *pd) -{ - int i; - unsigned configured = 0; - - dst->pd = pd; - dst->master_netdev = dev; - dst->cpu_port = -1; - - for (i = 0; i < pd->nr_chips; i++) { - struct dsa_switch *ds; - - ds = dsa_switch_setup(dst, i, parent, pd->chip[i].host_dev); - if (IS_ERR(ds)) { - netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n", - i, PTR_ERR(ds)); - continue; - } - - dst->ds[i] = ds; - - ++configured; - } - - /* - * If no switch was found, exit cleanly - */ - if (!configured) - return -EPROBE_DEFER; - - /* - * If we use a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point on get - * sent to the tag format's receive function. - */ - wmb(); - dev->dsa_ptr = (void *)dst; - - return 0; -} - -static int dsa_probe(struct platform_device *pdev) -{ - struct dsa_platform_data *pd = pdev->dev.platform_data; - struct net_device *dev; - struct dsa_switch_tree *dst; - int ret; - - if (pdev->dev.of_node) { - ret = dsa_of_probe(&pdev->dev); - if (ret) - return ret; - - pd = pdev->dev.platform_data; - } - - if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL)) - return -EINVAL; - - if (pd->of_netdev) { - dev = pd->of_netdev; - dev_hold(dev); - } else { - dev = dsa_dev_to_net_device(pd->netdev); - } - if (dev == NULL) { - ret = -EPROBE_DEFER; - goto out; - } - - if (dev->dsa_ptr != NULL) { - dev_put(dev); - ret = -EEXIST; - goto out; - } - - dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL); - if (dst == NULL) { - dev_put(dev); - ret = -ENOMEM; - goto out; - } - - platform_set_drvdata(pdev, dst); - - ret = dsa_setup_dst(dst, dev, &pdev->dev, pd); - if (ret) { - dev_put(dev); - goto out; - } - - return 0; - -out: - dsa_of_remove(&pdev->dev); - - return ret; -} - -static void dsa_remove_dst(struct dsa_switch_tree *dst) -{ - int i; - - dst->master_netdev->dsa_ptr = NULL; - - /* If we used a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point get sent - * without the tag and go through the regular receive path. - */ - wmb(); - - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; - - if (ds) - dsa_switch_destroy(ds); - } - - dsa_cpu_port_ethtool_restore(dst->cpu_switch); - - dev_put(dst->master_netdev); -} - -static int dsa_remove(struct platform_device *pdev) -{ - struct dsa_switch_tree *dst = platform_get_drvdata(pdev); - - dsa_remove_dst(dst); - dsa_of_remove(&pdev->dev); - - return 0; -} - -static void dsa_shutdown(struct platform_device *pdev) -{ -} - static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; + struct sk_buff *nskb = NULL; if (unlikely(dst == NULL)) { kfree_skb(skb); return 0; } - return dst->rcv(skb, dev, pt, orig_dev); -} - -static struct packet_type dsa_pack_type __read_mostly = { - .type = cpu_to_be16(ETH_P_XDSA), - .func = dsa_switch_rcv, -}; - -#ifdef CONFIG_PM_SLEEP -static int dsa_suspend(struct device *d) -{ - struct platform_device *pdev = to_platform_device(d); - struct dsa_switch_tree *dst = platform_get_drvdata(pdev); - int i, ret = 0; - - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + return 0; - if (ds != NULL) - ret = dsa_switch_suspend(ds); + nskb = dst->rcv(skb, dev, pt, orig_dev); + if (!nskb) { + kfree_skb(skb); + return 0; } - return ret; -} - -static int dsa_resume(struct device *d) -{ - struct platform_device *pdev = to_platform_device(d); - struct dsa_switch_tree *dst = platform_get_drvdata(pdev); - int i, ret = 0; + skb = nskb; + skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, skb->dev); - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; - if (ds != NULL) - ret = dsa_switch_resume(ds); - } + netif_receive_skb(skb); - return ret; + return 0; } -#endif - -static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume); -static const struct of_device_id dsa_of_match_table[] = { - { .compatible = "marvell,dsa", }, - {} -}; -MODULE_DEVICE_TABLE(of, dsa_of_match_table); - -static struct platform_driver dsa_driver = { - .probe = dsa_probe, - .remove = dsa_remove, - .shutdown = dsa_shutdown, - .driver = { - .name = "dsa", - .of_match_table = dsa_of_match_table, - .pm = &dsa_pm_ops, - }, +static struct packet_type dsa_pack_type __read_mostly = { + .type = cpu_to_be16(ETH_P_XDSA), + .func = dsa_switch_rcv, }; static int __init dsa_init_module(void) @@ -971,7 +233,7 @@ static int __init dsa_init_module(void) if (rc) return rc; - rc = platform_driver_register(&dsa_driver); + rc = dsa_legacy_register(); if (rc) return rc; @@ -985,7 +247,7 @@ static void __exit dsa_cleanup_module(void) { dsa_slave_unregister_notifier(); dev_remove_pack(&dsa_pack_type); - platform_driver_unregister(&dsa_driver); + dsa_legacy_unregister(); } module_exit(dsa_cleanup_module); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 737be6470c7f..033b3bfb63dc 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -13,16 +13,20 @@ #include <linux/device.h> #include <linux/err.h> #include <linux/list.h> +#include <linux/netdevice.h> #include <linux/slab.h> #include <linux/rtnetlink.h> -#include <net/dsa.h> #include <linux/of.h> #include <linux/of_net.h> +#include <net/dsa.h> #include "dsa_priv.h" static LIST_HEAD(dsa_switch_trees); static DEFINE_MUTEX(dsa2_mutex); +static const struct devlink_ops dsa_devlink_ops = { +}; + static struct dsa_switch_tree *dsa_get_dst(u32 tree) { struct dsa_switch_tree *dst; @@ -222,12 +226,18 @@ static int dsa_dsa_port_apply(struct dsa_port *port, u32 index, return err; } - return 0; + memset(&ds->ports[index].devlink_port, 0, + sizeof(ds->ports[index].devlink_port)); + + return devlink_port_register(ds->devlink, + &ds->ports[index].devlink_port, + index); } static void dsa_dsa_port_unapply(struct dsa_port *port, u32 index, struct dsa_switch *ds) { + devlink_port_unregister(&ds->ports[index].devlink_port); dsa_cpu_dsa_destroy(port); } @@ -245,12 +255,17 @@ static int dsa_cpu_port_apply(struct dsa_port *port, u32 index, ds->cpu_port_mask |= BIT(index); - return 0; + memset(&ds->ports[index].devlink_port, 0, + sizeof(ds->ports[index].devlink_port)); + err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port, + index); + return err; } static void dsa_cpu_port_unapply(struct dsa_port *port, u32 index, struct dsa_switch *ds) { + devlink_port_unregister(&ds->ports[index].devlink_port); dsa_cpu_dsa_destroy(port); ds->cpu_port_mask &= ~BIT(index); @@ -275,12 +290,23 @@ static int dsa_user_port_apply(struct dsa_port *port, u32 index, return err; } + memset(&ds->ports[index].devlink_port, 0, + sizeof(ds->ports[index].devlink_port)); + err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port, + index); + if (err) + return err; + + devlink_port_type_eth_set(&ds->ports[index].devlink_port, + ds->ports[index].netdev); + return 0; } static void dsa_user_port_unapply(struct dsa_port *port, u32 index, struct dsa_switch *ds) { + devlink_port_unregister(&ds->ports[index].devlink_port); if (ds->ports[index].netdev) { dsa_slave_destroy(ds->ports[index].netdev); ds->ports[index].netdev = NULL; @@ -301,6 +327,17 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) */ ds->phys_mii_mask = ds->enabled_port_mask; + /* Add the switch to devlink before calling setup, so that setup can + * add dpipe tables + */ + ds->devlink = devlink_alloc(&dsa_devlink_ops, 0); + if (!ds->devlink) + return -ENOMEM; + + err = devlink_register(ds->devlink, ds->dev); + if (err) + return err; + err = ds->ops->setup(ds); if (err < 0) return err; @@ -381,6 +418,13 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) mdiobus_unregister(ds->slave_mii_bus); dsa_switch_unregister_notifier(ds); + + if (ds->devlink) { + devlink_unregister(ds->devlink); + devlink_free(ds->devlink); + ds->devlink = NULL; + } + } static int dsa_dst_apply(struct dsa_switch_tree *dst) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 0706a511244e..ab397c07880f 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -17,8 +17,9 @@ struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); - int (*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev); + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev); }; struct dsa_slave_priv { @@ -54,6 +55,10 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds); void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds); +/* legacy.c */ +int dsa_legacy_register(void); +void dsa_legacy_unregister(void); + /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); @@ -85,4 +90,7 @@ extern const struct dsa_device_ops brcm_netdev_ops; /* tag_qca.c */ extern const struct dsa_device_ops qca_netdev_ops; +/* tag_mtk.c */ +extern const struct dsa_device_ops mtk_netdev_ops; + #endif diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c new file mode 100644 index 000000000000..ad345c8b0b06 --- /dev/null +++ b/net/dsa/legacy.c @@ -0,0 +1,818 @@ +/* + * net/dsa/legacy.c - Hardware switch handling + * Copyright (c) 2008-2009 Marvell Semiconductor + * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/device.h> +#include <linux/list.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_mdio.h> +#include <linux/of_platform.h> +#include <linux/of_net.h> +#include <linux/netdevice.h> +#include <linux/sysfs.h> +#include <linux/phy_fixed.h> +#include <linux/etherdevice.h> +#include <net/dsa.h> +#include "dsa_priv.h" + +/* switch driver registration ***********************************************/ +static DEFINE_MUTEX(dsa_switch_drivers_mutex); +static LIST_HEAD(dsa_switch_drivers); + +void register_switch_driver(struct dsa_switch_driver *drv) +{ + mutex_lock(&dsa_switch_drivers_mutex); + list_add_tail(&drv->list, &dsa_switch_drivers); + mutex_unlock(&dsa_switch_drivers_mutex); +} +EXPORT_SYMBOL_GPL(register_switch_driver); + +void unregister_switch_driver(struct dsa_switch_driver *drv) +{ + mutex_lock(&dsa_switch_drivers_mutex); + list_del_init(&drv->list); + mutex_unlock(&dsa_switch_drivers_mutex); +} +EXPORT_SYMBOL_GPL(unregister_switch_driver); + +static const struct dsa_switch_ops * +dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, + const char **_name, void **priv) +{ + const struct dsa_switch_ops *ret; + struct list_head *list; + const char *name; + + ret = NULL; + name = NULL; + + mutex_lock(&dsa_switch_drivers_mutex); + list_for_each(list, &dsa_switch_drivers) { + const struct dsa_switch_ops *ops; + struct dsa_switch_driver *drv; + + drv = list_entry(list, struct dsa_switch_driver, list); + ops = drv->ops; + + name = ops->probe(parent, host_dev, sw_addr, priv); + if (name != NULL) { + ret = ops; + break; + } + } + mutex_unlock(&dsa_switch_drivers_mutex); + + *_name = name; + + return ret; +} + +/* basic switch operations **************************************************/ +static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev) +{ + struct dsa_port *dport; + int ret, port; + + for (port = 0; port < ds->num_ports; port++) { + if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) + continue; + + dport = &ds->ports[port]; + ret = dsa_cpu_dsa_setup(ds, dev, dport, port); + if (ret) + return ret; + } + return 0; +} + +static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) +{ + const struct dsa_switch_ops *ops = ds->ops; + struct dsa_switch_tree *dst = ds->dst; + struct dsa_chip_data *cd = ds->cd; + bool valid_name_found = false; + int index = ds->index; + int i, ret; + + /* + * Validate supplied switch configuration. + */ + for (i = 0; i < ds->num_ports; i++) { + char *name; + + name = cd->port_names[i]; + if (name == NULL) + continue; + + if (!strcmp(name, "cpu")) { + if (dst->cpu_switch) { + netdev_err(dst->master_netdev, + "multiple cpu ports?!\n"); + return -EINVAL; + } + dst->cpu_switch = ds; + dst->cpu_port = i; + ds->cpu_port_mask |= 1 << i; + } else if (!strcmp(name, "dsa")) { + ds->dsa_port_mask |= 1 << i; + } else { + ds->enabled_port_mask |= 1 << i; + } + valid_name_found = true; + } + + if (!valid_name_found && i == ds->num_ports) + return -EINVAL; + + /* Make the built-in MII bus mask match the number of ports, + * switch drivers can override this later + */ + ds->phys_mii_mask = ds->enabled_port_mask; + + /* + * If the CPU connects to this switch, set the switch tree + * tagging protocol to the preferred tagging format of this + * switch. + */ + if (dst->cpu_switch == ds) { + enum dsa_tag_protocol tag_protocol; + + tag_protocol = ops->get_tag_protocol(ds); + dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); + if (IS_ERR(dst->tag_ops)) + return PTR_ERR(dst->tag_ops); + + dst->rcv = dst->tag_ops->rcv; + } + + memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable)); + + /* + * Do basic register setup. + */ + ret = ops->setup(ds); + if (ret < 0) + return ret; + + ret = dsa_switch_register_notifier(ds); + if (ret) + return ret; + + if (ops->set_addr) { + ret = ops->set_addr(ds, dst->master_netdev->dev_addr); + if (ret < 0) + return ret; + } + + if (!ds->slave_mii_bus && ops->phy_read) { + ds->slave_mii_bus = devm_mdiobus_alloc(parent); + if (!ds->slave_mii_bus) + return -ENOMEM; + dsa_slave_mii_bus_init(ds); + + ret = mdiobus_register(ds->slave_mii_bus); + if (ret < 0) + return ret; + } + + /* + * Create network devices for physical switch ports. + */ + for (i = 0; i < ds->num_ports; i++) { + ds->ports[i].dn = cd->port_dn[i]; + + if (!(ds->enabled_port_mask & (1 << i))) + continue; + + ret = dsa_slave_create(ds, parent, i, cd->port_names[i]); + if (ret < 0) + netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n", + index, i, cd->port_names[i], ret); + } + + /* Perform configuration of the CPU and DSA ports */ + ret = dsa_cpu_dsa_setups(ds, parent); + if (ret < 0) + netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n", + index); + + ret = dsa_cpu_port_ethtool_setup(ds); + if (ret) + return ret; + + return 0; +} + +static struct dsa_switch * +dsa_switch_setup(struct dsa_switch_tree *dst, int index, + struct device *parent, struct device *host_dev) +{ + struct dsa_chip_data *cd = dst->pd->chip + index; + const struct dsa_switch_ops *ops; + struct dsa_switch *ds; + int ret; + const char *name; + void *priv; + + /* + * Probe for switch model. + */ + ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); + if (!ops) { + netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", + index); + return ERR_PTR(-EINVAL); + } + netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n", + index, name); + + + /* + * Allocate and initialise switch state. + */ + ds = dsa_switch_alloc(parent, DSA_MAX_PORTS); + if (!ds) + return ERR_PTR(-ENOMEM); + + ds->dst = dst; + ds->index = index; + ds->cd = cd; + ds->ops = ops; + ds->priv = priv; + + ret = dsa_switch_setup_one(ds, parent); + if (ret) + return ERR_PTR(ret); + + return ds; +} + +static void dsa_switch_destroy(struct dsa_switch *ds) +{ + int port; + + /* Destroy network devices for physical switch ports. */ + for (port = 0; port < ds->num_ports; port++) { + if (!(ds->enabled_port_mask & (1 << port))) + continue; + + if (!ds->ports[port].netdev) + continue; + + dsa_slave_destroy(ds->ports[port].netdev); + } + + /* Disable configuration of the CPU and DSA ports */ + for (port = 0; port < ds->num_ports; port++) { + if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) + continue; + dsa_cpu_dsa_destroy(&ds->ports[port]); + + /* Clearing a bit which is not set does no harm */ + ds->cpu_port_mask |= ~(1 << port); + ds->dsa_port_mask |= ~(1 << port); + } + + if (ds->slave_mii_bus && ds->ops->phy_read) + mdiobus_unregister(ds->slave_mii_bus); + + dsa_switch_unregister_notifier(ds); +} + +#ifdef CONFIG_PM_SLEEP +int dsa_switch_suspend(struct dsa_switch *ds) +{ + int i, ret = 0; + + /* Suspend slave network devices */ + for (i = 0; i < ds->num_ports; i++) { + if (!dsa_is_port_initialized(ds, i)) + continue; + + ret = dsa_slave_suspend(ds->ports[i].netdev); + if (ret) + return ret; + } + + if (ds->ops->suspend) + ret = ds->ops->suspend(ds); + + return ret; +} +EXPORT_SYMBOL_GPL(dsa_switch_suspend); + +int dsa_switch_resume(struct dsa_switch *ds) +{ + int i, ret = 0; + + if (ds->ops->resume) + ret = ds->ops->resume(ds); + + if (ret) + return ret; + + /* Resume slave network devices */ + for (i = 0; i < ds->num_ports; i++) { + if (!dsa_is_port_initialized(ds, i)) + continue; + + ret = dsa_slave_resume(ds->ports[i].netdev); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(dsa_switch_resume); +#endif + +/* platform driver init and cleanup *****************************************/ +static int dev_is_class(struct device *dev, void *class) +{ + if (dev->class != NULL && !strcmp(dev->class->name, class)) + return 1; + + return 0; +} + +static struct device *dev_find_class(struct device *parent, char *class) +{ + if (dev_is_class(parent, class)) { + get_device(parent); + return parent; + } + + return device_find_child(parent, class, dev_is_class); +} + +struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) +{ + struct device *d; + + d = dev_find_class(dev, "mdio_bus"); + if (d != NULL) { + struct mii_bus *bus; + + bus = to_mii_bus(d); + put_device(d); + + return bus; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus); + +#ifdef CONFIG_OF +static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, + struct dsa_chip_data *cd, + int chip_index, int port_index, + struct device_node *link) +{ + const __be32 *reg; + int link_sw_addr; + struct device_node *parent_sw; + int len; + + parent_sw = of_get_parent(link); + if (!parent_sw) + return -EINVAL; + + reg = of_get_property(parent_sw, "reg", &len); + if (!reg || (len != sizeof(*reg) * 2)) + return -EINVAL; + + /* + * Get the destination switch number from the second field of its 'reg' + * property, i.e. for "reg = <0x19 1>" sw_addr is '1'. + */ + link_sw_addr = be32_to_cpup(reg + 1); + + if (link_sw_addr >= pd->nr_chips) + return -EINVAL; + + cd->rtable[link_sw_addr] = port_index; + + return 0; +} + +static int dsa_of_probe_links(struct dsa_platform_data *pd, + struct dsa_chip_data *cd, + int chip_index, int port_index, + struct device_node *port, + const char *port_name) +{ + struct device_node *link; + int link_index; + int ret; + + for (link_index = 0;; link_index++) { + link = of_parse_phandle(port, "link", link_index); + if (!link) + break; + + if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) { + ret = dsa_of_setup_routing_table(pd, cd, chip_index, + port_index, link); + if (ret) + return ret; + } + } + return 0; +} + +static void dsa_of_free_platform_data(struct dsa_platform_data *pd) +{ + int i; + int port_index; + + for (i = 0; i < pd->nr_chips; i++) { + port_index = 0; + while (port_index < DSA_MAX_PORTS) { + kfree(pd->chip[i].port_names[port_index]); + port_index++; + } + + /* Drop our reference to the MDIO bus device */ + if (pd->chip[i].host_dev) + put_device(pd->chip[i].host_dev); + } + kfree(pd->chip); +} + +static int dsa_of_probe(struct device *dev) +{ + struct device_node *np = dev->of_node; + struct device_node *child, *mdio, *ethernet, *port; + struct mii_bus *mdio_bus, *mdio_bus_switch; + struct net_device *ethernet_dev; + struct dsa_platform_data *pd; + struct dsa_chip_data *cd; + const char *port_name; + int chip_index, port_index; + const unsigned int *sw_addr, *port_reg; + u32 eeprom_len; + int ret; + + mdio = of_parse_phandle(np, "dsa,mii-bus", 0); + if (!mdio) + return -EINVAL; + + mdio_bus = of_mdio_find_bus(mdio); + if (!mdio_bus) + return -EPROBE_DEFER; + + ethernet = of_parse_phandle(np, "dsa,ethernet", 0); + if (!ethernet) { + ret = -EINVAL; + goto out_put_mdio; + } + + ethernet_dev = of_find_net_device_by_node(ethernet); + if (!ethernet_dev) { + ret = -EPROBE_DEFER; + goto out_put_mdio; + } + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) { + ret = -ENOMEM; + goto out_put_ethernet; + } + + dev->platform_data = pd; + pd->of_netdev = ethernet_dev; + pd->nr_chips = of_get_available_child_count(np); + if (pd->nr_chips > DSA_MAX_SWITCHES) + pd->nr_chips = DSA_MAX_SWITCHES; + + pd->chip = kcalloc(pd->nr_chips, sizeof(struct dsa_chip_data), + GFP_KERNEL); + if (!pd->chip) { + ret = -ENOMEM; + goto out_free; + } + + chip_index = -1; + for_each_available_child_of_node(np, child) { + int i; + + chip_index++; + cd = &pd->chip[chip_index]; + + cd->of_node = child; + + /* Initialize the routing table */ + for (i = 0; i < DSA_MAX_SWITCHES; ++i) + cd->rtable[i] = DSA_RTABLE_NONE; + + /* When assigning the host device, increment its refcount */ + cd->host_dev = get_device(&mdio_bus->dev); + + sw_addr = of_get_property(child, "reg", NULL); + if (!sw_addr) + continue; + + cd->sw_addr = be32_to_cpup(sw_addr); + if (cd->sw_addr >= PHY_MAX_ADDR) + continue; + + if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) + cd->eeprom_len = eeprom_len; + + mdio = of_parse_phandle(child, "mii-bus", 0); + if (mdio) { + mdio_bus_switch = of_mdio_find_bus(mdio); + if (!mdio_bus_switch) { + ret = -EPROBE_DEFER; + goto out_free_chip; + } + + /* Drop the mdio_bus device ref, replacing the host + * device with the mdio_bus_switch device, keeping + * the refcount from of_mdio_find_bus() above. + */ + put_device(cd->host_dev); + cd->host_dev = &mdio_bus_switch->dev; + } + + for_each_available_child_of_node(child, port) { + port_reg = of_get_property(port, "reg", NULL); + if (!port_reg) + continue; + + port_index = be32_to_cpup(port_reg); + if (port_index >= DSA_MAX_PORTS) + break; + + port_name = of_get_property(port, "label", NULL); + if (!port_name) + continue; + + cd->port_dn[port_index] = port; + + cd->port_names[port_index] = kstrdup(port_name, + GFP_KERNEL); + if (!cd->port_names[port_index]) { + ret = -ENOMEM; + goto out_free_chip; + } + + ret = dsa_of_probe_links(pd, cd, chip_index, + port_index, port, port_name); + if (ret) + goto out_free_chip; + + } + } + + /* The individual chips hold their own refcount on the mdio bus, + * so drop ours */ + put_device(&mdio_bus->dev); + + return 0; + +out_free_chip: + dsa_of_free_platform_data(pd); +out_free: + kfree(pd); + dev->platform_data = NULL; +out_put_ethernet: + put_device(ðernet_dev->dev); +out_put_mdio: + put_device(&mdio_bus->dev); + return ret; +} + +static void dsa_of_remove(struct device *dev) +{ + struct dsa_platform_data *pd = dev->platform_data; + + if (!dev->of_node) + return; + + dsa_of_free_platform_data(pd); + put_device(&pd->of_netdev->dev); + kfree(pd); +} +#else +static inline int dsa_of_probe(struct device *dev) +{ + return 0; +} + +static inline void dsa_of_remove(struct device *dev) +{ +} +#endif + +static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, + struct device *parent, struct dsa_platform_data *pd) +{ + int i; + unsigned configured = 0; + + dst->pd = pd; + dst->master_netdev = dev; + dst->cpu_port = -1; + + for (i = 0; i < pd->nr_chips; i++) { + struct dsa_switch *ds; + + ds = dsa_switch_setup(dst, i, parent, pd->chip[i].host_dev); + if (IS_ERR(ds)) { + netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n", + i, PTR_ERR(ds)); + continue; + } + + dst->ds[i] = ds; + + ++configured; + } + + /* + * If no switch was found, exit cleanly + */ + if (!configured) + return -EPROBE_DEFER; + + /* + * If we use a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point on get + * sent to the tag format's receive function. + */ + wmb(); + dev->dsa_ptr = (void *)dst; + + return 0; +} + +static int dsa_probe(struct platform_device *pdev) +{ + struct dsa_platform_data *pd = pdev->dev.platform_data; + struct net_device *dev; + struct dsa_switch_tree *dst; + int ret; + + if (pdev->dev.of_node) { + ret = dsa_of_probe(&pdev->dev); + if (ret) + return ret; + + pd = pdev->dev.platform_data; + } + + if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL)) + return -EINVAL; + + if (pd->of_netdev) { + dev = pd->of_netdev; + dev_hold(dev); + } else { + dev = dsa_dev_to_net_device(pd->netdev); + } + if (dev == NULL) { + ret = -EPROBE_DEFER; + goto out; + } + + if (dev->dsa_ptr != NULL) { + dev_put(dev); + ret = -EEXIST; + goto out; + } + + dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL); + if (dst == NULL) { + dev_put(dev); + ret = -ENOMEM; + goto out; + } + + platform_set_drvdata(pdev, dst); + + ret = dsa_setup_dst(dst, dev, &pdev->dev, pd); + if (ret) { + dev_put(dev); + goto out; + } + + return 0; + +out: + dsa_of_remove(&pdev->dev); + + return ret; +} + +static void dsa_remove_dst(struct dsa_switch_tree *dst) +{ + int i; + + dst->master_netdev->dsa_ptr = NULL; + + /* If we used a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point get sent + * without the tag and go through the regular receive path. + */ + wmb(); + + for (i = 0; i < dst->pd->nr_chips; i++) { + struct dsa_switch *ds = dst->ds[i]; + + if (ds) + dsa_switch_destroy(ds); + } + + dsa_cpu_port_ethtool_restore(dst->cpu_switch); + + dev_put(dst->master_netdev); +} + +static int dsa_remove(struct platform_device *pdev) +{ + struct dsa_switch_tree *dst = platform_get_drvdata(pdev); + + dsa_remove_dst(dst); + dsa_of_remove(&pdev->dev); + + return 0; +} + +static void dsa_shutdown(struct platform_device *pdev) +{ +} + +#ifdef CONFIG_PM_SLEEP +static int dsa_suspend(struct device *d) +{ + struct platform_device *pdev = to_platform_device(d); + struct dsa_switch_tree *dst = platform_get_drvdata(pdev); + int i, ret = 0; + + for (i = 0; i < dst->pd->nr_chips; i++) { + struct dsa_switch *ds = dst->ds[i]; + + if (ds != NULL) + ret = dsa_switch_suspend(ds); + } + + return ret; +} + +static int dsa_resume(struct device *d) +{ + struct platform_device *pdev = to_platform_device(d); + struct dsa_switch_tree *dst = platform_get_drvdata(pdev); + int i, ret = 0; + + for (i = 0; i < dst->pd->nr_chips; i++) { + struct dsa_switch *ds = dst->ds[i]; + + if (ds != NULL) + ret = dsa_switch_resume(ds); + } + + return ret; +} +#endif + +static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume); + +static const struct of_device_id dsa_of_match_table[] = { + { .compatible = "marvell,dsa", }, + {} +}; +MODULE_DEVICE_TABLE(of, dsa_of_match_table); + +static struct platform_driver dsa_driver = { + .probe = dsa_probe, + .remove = dsa_remove, + .shutdown = dsa_shutdown, + .driver = { + .name = "dsa", + .of_match_table = dsa_of_match_table, + .pm = &dsa_pm_ops, + }, +}; + +int dsa_legacy_register(void) +{ + return platform_driver_register(&dsa_driver); +} + +void dsa_legacy_unregister(void) +{ + platform_driver_unregister(&dsa_driver); +} diff --git a/net/dsa/slave.c b/net/dsa/slave.c index c34872e1febc..7693182df81e 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -17,6 +17,7 @@ #include <linux/of_mdio.h> #include <linux/mdio.h> #include <linux/list.h> +#include <net/dsa.h> #include <net/rtnetlink.h> #include <net/switchdev.h> #include <net/pkt_cls.h> @@ -419,8 +420,8 @@ static int dsa_slave_vlan_filtering(struct net_device *dev, return 0; } -static int dsa_fastest_ageing_time(struct dsa_switch *ds, - unsigned int ageing_time) +static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds, + unsigned int ageing_time) { int i; @@ -443,9 +444,13 @@ static int dsa_slave_ageing_time(struct net_device *dev, unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time); unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies); - /* bridge skips -EOPNOTSUPP, so skip the prepare phase */ - if (switchdev_trans_ph_prepare(trans)) + if (switchdev_trans_ph_prepare(trans)) { + if (ds->ageing_time_min && ageing_time < ds->ageing_time_min) + return -ERANGE; + if (ds->ageing_time_max && ageing_time > ds->ageing_time_max) + return -ERANGE; return 0; + } /* Keep the fastest ageing time in case of multiple bridges */ p->dp->ageing_time = ageing_time; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 6456dacf9ae9..ca6e26e514f0 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -1,7 +1,8 @@ /* * Handling of a single switch chip, part of a switch fabric * - * Copyright (c) 2017 Vivien Didelot <vivien.didelot@savoirfairelinux.com> + * Copyright (c) 2017 Savoir-faire Linux Inc. + * Vivien Didelot <vivien.didelot@savoirfairelinux.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,9 +20,9 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, if (ds->index == info->sw_index && ds->ops->port_bridge_join) return ds->ops->port_bridge_join(ds, info->port, info->br); - if (ds->index != info->sw_index) - dev_dbg(ds->dev, "crosschip DSA port %d.%d bridged to %s\n", - info->sw_index, info->port, netdev_name(info->br)); + if (ds->index != info->sw_index && ds->ops->crosschip_bridge_join) + return ds->ops->crosschip_bridge_join(ds, info->sw_index, + info->port, info->br); return 0; } @@ -32,9 +33,9 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, if (ds->index == info->sw_index && ds->ops->port_bridge_leave) ds->ops->port_bridge_leave(ds, info->port, info->br); - if (ds->index != info->sw_index) - dev_dbg(ds->dev, "crosschip DSA port %d.%d unbridged from %s\n", - info->sw_index, info->port, netdev_name(info->br)); + if (ds->index != info->sw_index && ds->ops->crosschip_bridge_leave) + ds->ops->crosschip_bridge_leave(ds, info->sw_index, info->port, + info->br); return 0; } diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 5d925b6b2bb1..2a9b52c5af86 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -12,6 +12,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> +#include <net/dsa.h> #include "dsa_priv.h" /* This tag length is 4 bytes, older ones were 6 bytes, we do not @@ -91,23 +92,17 @@ out_free: return NULL; } -static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; int source_port; u8 *brcm_tag; - if (unlikely(dst == NULL)) - goto out_drop; - ds = dst->cpu_switch; - skb = skb_unshare(skb, GFP_ATOMIC); - if (skb == NULL) - goto out; - if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN))) goto out_drop; @@ -139,22 +134,12 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, skb->data - ETH_HLEN - BRCM_TAG_LEN, 2 * ETH_ALEN); - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; skb->dev = ds->ports[source_port].netdev; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops brcm_netdev_ops = { diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 72579ceea381..1c6633f0de01 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -11,6 +11,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> +#include <net/dsa.h> #include "dsa_priv.h" #define DSA_HLEN 4 @@ -67,8 +68,9 @@ out_free: return NULL; } -static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; @@ -76,13 +78,6 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, int source_device; int source_port; - if (unlikely(dst == NULL)) - goto out_drop; - - skb = skb_unshare(skb, GFP_ATOMIC); - if (skb == NULL) - goto out; - if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) goto out_drop; @@ -164,21 +159,11 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, } skb->dev = ds->ports[source_port].netdev; - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops dsa_netdev_ops = { diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 648c051817a1..d9c668aa5e54 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -11,6 +11,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> +#include <net/dsa.h> #include "dsa_priv.h" #define DSA_HLEN 4 @@ -80,8 +81,9 @@ out_free: return NULL; } -static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; @@ -89,13 +91,6 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, int source_device; int source_port; - if (unlikely(dst == NULL)) - goto out_drop; - - skb = skb_unshare(skb, GFP_ATOMIC); - if (skb == NULL) - goto out; - if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) goto out_drop; @@ -183,21 +178,11 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, } skb->dev = ds->ports[source_port].netdev; - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops edsa_netdev_ops = { diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c new file mode 100644 index 000000000000..837cdddb53f0 --- /dev/null +++ b/net/dsa/tag_mtk.c @@ -0,0 +1,100 @@ +/* + * Mediatek DSA Tag support + * Copyright (C) 2017 Landen Chao <landen.chao@mediatek.com> + * Sean Wang <sean.wang@mediatek.com> + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/etherdevice.h> +#include <net/dsa.h> +#include "dsa_priv.h" + +#define MTK_HDR_LEN 4 +#define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) +#define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0) + +static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u8 *mtk_tag; + + if (skb_cow_head(skb, MTK_HDR_LEN) < 0) + goto out_free; + + skb_push(skb, MTK_HDR_LEN); + + memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN); + + /* Build the tag after the MAC Source Address */ + mtk_tag = skb->data + 2 * ETH_ALEN; + mtk_tag[0] = 0; + mtk_tag[1] = (1 << p->dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; + mtk_tag[2] = 0; + mtk_tag[3] = 0; + + return skb; + +out_free: + kfree_skb(skb); + return NULL; +} + +static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds; + int port; + __be16 *phdr, hdr; + + if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN))) + goto out_drop; + + /* The MTK header is added by the switch between src addr + * and ethertype at this point, skb->data points to 2 bytes + * after src addr so header should be 2 bytes right before. + */ + phdr = (__be16 *)(skb->data - 2); + hdr = ntohs(*phdr); + + /* Remove MTK tag and recalculate checksum. */ + skb_pull_rcsum(skb, MTK_HDR_LEN); + + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - MTK_HDR_LEN, + 2 * ETH_ALEN); + + /* This protocol doesn't support cascading multiple + * switches so it's safe to assume the switch is first + * in the tree. + */ + ds = dst->ds[0]; + if (!ds) + goto out_drop; + + /* Get source port information */ + port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); + if (!ds->ports[port].netdev) + goto out_drop; + + skb->dev = ds->ports[port].netdev; + + return skb; + +out_drop: + return NULL; +} + +const struct dsa_device_ops mtk_netdev_ops = { + .xmit = mtk_tag_xmit, + .rcv = mtk_tag_rcv, +}; diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 30240f343aea..3ba3f59f7a34 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -12,6 +12,7 @@ */ #include <linux/etherdevice.h> +#include <net/dsa.h> #include "dsa_priv.h" #define QCA_HDR_LEN 2 @@ -65,8 +66,9 @@ out_free: return NULL; } -static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; @@ -74,13 +76,6 @@ static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, int port; __be16 *phdr, hdr; - if (unlikely(!dst)) - goto out_drop; - - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) - goto out; - if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN))) goto out_drop; @@ -114,22 +109,12 @@ static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, goto out_drop; /* Update skb & forward the frame accordingly */ - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; skb->dev = ds->ports[port].netdev; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops qca_netdev_ops = { diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 26f977176978..aafc2fc74c30 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -11,6 +11,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> +#include <net/dsa.h> #include "dsa_priv.h" static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) @@ -57,22 +58,17 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) return nskb; } -static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; u8 *trailer; int source_port; - if (unlikely(dst == NULL)) - goto out_drop; ds = dst->cpu_switch; - skb = skb_unshare(skb, GFP_ATOMIC); - if (skb == NULL) - goto out; - if (skb_linearize(skb)) goto out_drop; @@ -88,21 +84,11 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, pskb_trim_rcsum(skb, skb->len - 4); skb->dev = ds->ports[source_port].netdev; - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops trailer_netdev_ops = { diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 1ab30e7d3f99..81dac16933fc 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -350,7 +350,7 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) return 0; invalid: - netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL); + netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL); return 0; nla_put_failure: @@ -432,7 +432,7 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) return 0; invalid: - netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL); + netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL); return 0; nla_put_failure: diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index fc60cd061f39..99f6c254ea77 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -249,8 +249,7 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb, if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, genl_family_attrbuf(&nl802154_fam), - nl802154_fam.maxattr, - nl802154_policy); + nl802154_fam.maxattr, nl802154_policy, NULL); if (err) goto out_unlock; @@ -562,8 +561,8 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb, struct nl802154_dump_wpan_phy_state *state) { struct nlattr **tb = genl_family_attrbuf(&nl802154_fam); - int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, - tb, nl802154_fam.maxattr, nl802154_policy); + int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, tb, + nl802154_fam.maxattr, nl802154_policy, NULL); /* TODO check if we can handle error here, * we have no backward compatibility @@ -1308,7 +1307,7 @@ ieee802154_llsec_parse_dev_addr(struct nlattr *nla, struct nlattr *attrs[NL802154_DEV_ADDR_ATTR_MAX + 1]; if (!nla || nla_parse_nested(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla, - nl802154_dev_addr_policy)) + nl802154_dev_addr_policy, NULL)) return -EINVAL; if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || @@ -1348,7 +1347,7 @@ ieee802154_llsec_parse_key_id(struct nlattr *nla, struct nlattr *attrs[NL802154_KEY_ID_ATTR_MAX + 1]; if (!nla || nla_parse_nested(attrs, NL802154_KEY_ID_ATTR_MAX, nla, - nl802154_key_id_policy)) + nl802154_key_id_policy, NULL)) return -EINVAL; if (!attrs[NL802154_KEY_ID_ATTR_MODE]) @@ -1565,7 +1564,7 @@ static int nl802154_add_llsec_key(struct sk_buff *skb, struct genl_info *info) if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], - nl802154_key_policy)) + nl802154_key_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_KEY_ATTR_USAGE_FRAMES] || @@ -1615,7 +1614,7 @@ static int nl802154_del_llsec_key(struct sk_buff *skb, struct genl_info *info) if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], - nl802154_key_policy)) + nl802154_key_policy, info->extack)) return -EINVAL; if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0) @@ -1729,8 +1728,8 @@ ieee802154_llsec_parse_device(struct nlattr *nla, { struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1]; - if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, nla, - nl802154_dev_policy)) + if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, + nla, nl802154_dev_policy, NULL)) return -EINVAL; memset(dev, 0, sizeof(*dev)); @@ -1783,7 +1782,7 @@ static int nl802154_del_llsec_dev(struct sk_buff *skb, struct genl_info *info) if (nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVICE], - nl802154_dev_policy)) + nl802154_dev_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_DEV_ATTR_EXTENDED_ADDR]) @@ -1911,7 +1910,7 @@ static int nl802154_add_llsec_devkey(struct sk_buff *skb, struct genl_info *info if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] || nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], - nl802154_devkey_policy) < 0) + nl802154_devkey_policy, info->extack) < 0) return -EINVAL; if (!attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER] || @@ -1943,7 +1942,7 @@ static int nl802154_del_llsec_devkey(struct sk_buff *skb, struct genl_info *info if (nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], - nl802154_devkey_policy)) + nl802154_devkey_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]) @@ -2063,8 +2062,8 @@ llsec_parse_seclevel(struct nlattr *nla, struct ieee802154_llsec_seclevel *sl) { struct nlattr *attrs[NL802154_SECLEVEL_ATTR_MAX + 1]; - if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX, nla, - nl802154_seclevel_policy)) + if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX, + nla, nl802154_seclevel_policy, NULL)) return -EINVAL; memset(sl, 0, sizeof(*sl)); diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index c6d4238ff94a..f83de23a30e7 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_rate.o tcp_recovery.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ - fib_frontend.o fib_semantics.o fib_trie.o \ + fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6b1fc6e4278e..d1a11707a126 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1599,8 +1599,9 @@ static const struct net_protocol igmp_protocol = { }; #endif -static const struct net_protocol tcp_protocol = { +static struct net_protocol tcp_protocol = { .early_demux = tcp_v4_early_demux, + .early_demux_handler = tcp_v4_early_demux, .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, .no_policy = 1, @@ -1608,8 +1609,9 @@ static const struct net_protocol tcp_protocol = { .icmp_strict_tag_validation = 1, }; -static const struct net_protocol udp_protocol = { +static struct net_protocol udp_protocol = { .early_demux = udp_v4_early_demux, + .early_demux_handler = udp_v4_early_demux, .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, @@ -1720,6 +1722,8 @@ static __net_init int inet_init_net(struct net *net) net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; net->ipv4.sysctl_ip_dynaddr = 0; net->ipv4.sysctl_ip_early_demux = 1; + net->ipv4.sysctl_udp_early_demux = 1; + net->ipv4.sysctl_tcp_early_demux = 1; #ifdef CONFIG_SYSCTL net->ipv4.sysctl_ip_prot_sock = PROT_SOCK; #endif diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 51b27ae09fbd..0937b34c27ca 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -872,7 +872,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) skb->pkt_type != PACKET_HOST) state = NUD_STALE; neigh_update(n, sha, state, - override ? NEIGH_UPDATE_F_OVERRIDE : 0); + override ? NEIGH_UPDATE_F_OVERRIDE : 0, 0); neigh_release(n); } @@ -1033,7 +1033,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, err = neigh_update(neigh, (r->arp_flags & ATF_COM) ? r->arp_ha.sa_data : NULL, state, NEIGH_UPDATE_F_OVERRIDE | - NEIGH_UPDATE_F_ADMIN); + NEIGH_UPDATE_F_ADMIN, 0); neigh_release(neigh); } return err; @@ -1084,7 +1084,7 @@ static int arp_invalidate(struct net_device *dev, __be32 ip) if (neigh->nud_state & ~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| - NEIGH_UPDATE_F_ADMIN); + NEIGH_UPDATE_F_ADMIN, 0); neigh_release(neigh); } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index cebedd545e5e..f33f53791f50 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -582,7 +582,8 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, + NULL); if (err < 0) goto errout; @@ -752,7 +753,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, struct in_device *in_dev; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, + NULL); if (err < 0) goto errout; @@ -1192,6 +1194,18 @@ out: return done; } +static __be32 in_dev_select_addr(const struct in_device *in_dev, + int scope) +{ + for_primary_ifa(in_dev) { + if (ifa->ifa_scope != RT_SCOPE_LINK && + ifa->ifa_scope <= scope) + return ifa->ifa_local; + } endfor_ifa(in_dev); + + return 0; +} + __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) { __be32 addr = 0; @@ -1228,13 +1242,9 @@ no_in_dev: if (master_idx && (dev = dev_get_by_index_rcu(net, master_idx)) && (in_dev = __in_dev_get_rcu(dev))) { - for_primary_ifa(in_dev) { - if (ifa->ifa_scope != RT_SCOPE_LINK && - ifa->ifa_scope <= scope) { - addr = ifa->ifa_local; - goto out_unlock; - } - } endfor_ifa(in_dev); + addr = in_dev_select_addr(in_dev, scope); + if (addr) + goto out_unlock; } /* Not loopback addresses on loopback should be preferred @@ -1249,13 +1259,9 @@ no_in_dev: if (!in_dev) continue; - for_primary_ifa(in_dev) { - if (ifa->ifa_scope != RT_SCOPE_LINK && - ifa->ifa_scope <= scope) { - addr = ifa->ifa_local; - goto out_unlock; - } - } endfor_ifa(in_dev); + addr = in_dev_select_addr(in_dev, scope); + if (addr) + goto out_unlock; } out_unlock: rcu_read_unlock(); @@ -1713,7 +1719,7 @@ static int inet_validate_link_af(const struct net_device *dev, if (dev && !__in_dev_get_rtnl(dev)) return -EAFNOSUPPORT; - err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); + err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL); if (err < 0) return err; @@ -1741,7 +1747,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) if (!in_dev) return -EAFNOSUPPORT; - if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0) + if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) BUG(); if (tb[IFLA_INET_CONF]) { @@ -1798,6 +1804,9 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) goto nla_put_failure; + if (!devconf) + goto out; + if ((all || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, IPV4_DEVCONF(*devconf, FORWARDING)) < 0) @@ -1819,6 +1828,7 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) goto nla_put_failure; +out: nlmsg_end(skb, nlh); return 0; @@ -1827,8 +1837,8 @@ nla_put_failure: return -EMSGSIZE; } -void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv4_devconf *devconf) +void inet_netconf_notify_devconf(struct net *net, int event, int type, + int ifindex, struct ipv4_devconf *devconf) { struct sk_buff *skb; int err = -ENOBUFS; @@ -1838,7 +1848,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, goto errout; err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, - RTM_NEWNETCONF, 0, type); + event, 0, type); if (err < 0) { /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ WARN_ON(err == -EMSGSIZE); @@ -1874,7 +1884,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, int err; err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_ipv4_policy); + devconf_ipv4_policy, NULL); if (err < 0) goto errout; @@ -2017,10 +2027,12 @@ static void inet_forward_change(struct net *net) IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; IPV4_DEVCONF_DFLT(net, FORWARDING) = on; - inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); - inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv4.devconf_dflt); @@ -2033,7 +2045,8 @@ static void inet_forward_change(struct net *net) in_dev = __in_dev_get_rtnl(dev); if (in_dev) { IN_DEV_CONF_SET(in_dev, FORWARDING, on); - inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, dev->ifindex, &in_dev->cnf); } } @@ -2078,19 +2091,22 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write, if (i == IPV4_DEVCONF_RP_FILTER - 1 && new_value != old_value) { ifindex = devinet_conf_ifindex(net, cnf); - inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_RP_FILTER, ifindex, cnf); } if (i == IPV4_DEVCONF_PROXY_ARP - 1 && new_value != old_value) { ifindex = devinet_conf_ifindex(net, cnf); - inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_PROXY_NEIGH, ifindex, cnf); } if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 && new_value != old_value) { ifindex = devinet_conf_ifindex(net, cnf); - inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, ifindex, cnf); } } @@ -2125,7 +2141,7 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write, container_of(cnf, struct in_device, cnf); if (*valp) dev_disable_lro(idev->dev); - inet_netconf_notify_devconf(net, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_FORWARDING, idev->dev->ifindex, cnf); @@ -2133,7 +2149,8 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write, rtnl_unlock(); rt_cache_flush(net); } else - inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv4.devconf_dflt); } @@ -2255,7 +2272,8 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, p->sysctl = t; - inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p); + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, + ifindex, p); return 0; free: @@ -2264,16 +2282,18 @@ out: return -ENOBUFS; } -static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) +static void __devinet_sysctl_unregister(struct net *net, + struct ipv4_devconf *cnf, int ifindex) { struct devinet_sysctl_table *t = cnf->sysctl; - if (!t) - return; + if (t) { + cnf->sysctl = NULL; + unregister_net_sysctl_table(t->sysctl_header); + kfree(t); + } - cnf->sysctl = NULL; - unregister_net_sysctl_table(t->sysctl_header); - kfree(t); + inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL); } static int devinet_sysctl_register(struct in_device *idev) @@ -2295,7 +2315,9 @@ static int devinet_sysctl_register(struct in_device *idev) static void devinet_sysctl_unregister(struct in_device *idev) { - __devinet_sysctl_unregister(&idev->cnf); + struct net *net = dev_net(idev->dev); + + __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex); neigh_sysctl_unregister(idev->arp_parms); } @@ -2370,9 +2392,9 @@ static __net_init int devinet_init_net(struct net *net) #ifdef CONFIG_SYSCTL err_reg_ctl: - __devinet_sysctl_unregister(dflt); + __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT); err_reg_dflt: - __devinet_sysctl_unregister(all); + __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL); err_reg_all: if (tbl != ctl_forward_entry) kfree(tbl); @@ -2394,8 +2416,10 @@ static __net_exit void devinet_exit_net(struct net *net) tbl = net->ipv4.forw_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.forw_hdr); - __devinet_sysctl_unregister(net->ipv4.devconf_dflt); - __devinet_sysctl_unregister(net->ipv4.devconf_all); + __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt, + NETCONFA_IFINDEX_DEFAULT); + __devinet_sysctl_unregister(net, net->ipv4.devconf_all, + NETCONFA_IFINDEX_ALL); kfree(tbl); #endif kfree(net->ipv4.devconf_dflt); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 42bfd08109dd..434dd2538716 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -632,7 +632,8 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, int err, remaining; struct rtmsg *rtm; - err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); + err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy, + NULL); if (err < 0) goto errout; @@ -1083,7 +1084,8 @@ static void nl_fib_input(struct sk_buff *skb) net = sock_net(skb->sk); nlh = nlmsg_hdr(skb); - if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len || + if (skb->len < nlmsg_total_size(sizeof(*frn)) || + skb->len < nlh->nlmsg_len || nlmsg_len(nlh) < sizeof(*frn)) return; diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c new file mode 100644 index 000000000000..e0714d975947 --- /dev/null +++ b/net/ipv4/fib_notifier.c @@ -0,0 +1,86 @@ +#include <linux/rtnetlink.h> +#include <linux/notifier.h> +#include <linux/rcupdate.h> +#include <linux/kernel.h> +#include <net/net_namespace.h> +#include <net/netns/ipv4.h> +#include <net/ip_fib.h> + +static ATOMIC_NOTIFIER_HEAD(fib_chain); + +int call_fib_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->net = net; + return nb->notifier_call(nb, event_type, info); +} + +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + net->ipv4.fib_seq++; + info->net = net; + return atomic_notifier_call_chain(&fib_chain, event_type, info); +} + +static unsigned int fib_seq_sum(void) +{ + unsigned int fib_seq = 0; + struct net *net; + + rtnl_lock(); + for_each_net(net) + fib_seq += net->ipv4.fib_seq; + rtnl_unlock(); + + return fib_seq; +} + +static bool fib_dump_is_consistent(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb), + unsigned int fib_seq) +{ + atomic_notifier_chain_register(&fib_chain, nb); + if (fib_seq == fib_seq_sum()) + return true; + atomic_notifier_chain_unregister(&fib_chain, nb); + if (cb) + cb(nb); + return false; +} + +#define FIB_DUMP_MAX_RETRIES 5 +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)) +{ + int retries = 0; + + do { + unsigned int fib_seq = fib_seq_sum(); + struct net *net; + + /* Mutex semantics guarantee that every change done to + * FIB tries before we read the change sequence counter + * is now visible to us. + */ + rcu_read_lock(); + for_each_net_rcu(net) { + fib_rules_notify(net, nb); + fib_notify(net, nb); + } + rcu_read_unlock(); + + if (fib_dump_is_consistent(nb, cb, fib_seq)) + return 0; + } while (++retries < FIB_DUMP_MAX_RETRIES); + + return -EBUSY; +} +EXPORT_SYMBOL(register_fib_notifier); + +int unregister_fib_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&fib_chain, nb); +} +EXPORT_SYMBOL(unregister_fib_notifier); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 2e50062f642d..778ecf977eb2 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -47,6 +47,27 @@ struct fib4_rule { #endif }; +static bool fib4_rule_matchall(const struct fib_rule *rule) +{ + struct fib4_rule *r = container_of(rule, struct fib4_rule, common); + + if (r->dst_len || r->src_len || r->tos) + return false; + return fib_rule_matchall(rule); +} + +bool fib4_rule_default(const struct fib_rule *rule) +{ + if (!fib4_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL || + rule->l3mdev) + return false; + if (rule->table != RT_TABLE_LOCAL && rule->table != RT_TABLE_MAIN && + rule->table != RT_TABLE_DEFAULT) + return false; + return true; +} +EXPORT_SYMBOL_GPL(fib4_rule_default); + int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags) { @@ -164,12 +185,36 @@ static struct fib_table *fib_empty_table(struct net *net) return NULL; } +static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_rule *rule) +{ + struct fib_rule_notifier_info info = { + .rule = rule, + }; + + return call_fib_notifier(nb, net, event_type, &info.info); +} + static int call_fib_rule_notifiers(struct net *net, - enum fib_event_type event_type) + enum fib_event_type event_type, + struct fib_rule *rule) +{ + struct fib_rule_notifier_info info = { + .rule = rule, + }; + + return call_fib_notifiers(net, event_type, &info.info); +} + +/* Called with rcu_read_lock() */ +void fib_rules_notify(struct net *net, struct notifier_block *nb) { - struct fib_notifier_info info; + struct fib_rules_ops *ops = net->ipv4.rules_ops; + struct fib_rule *rule; - return call_fib_notifiers(net, event_type, &info); + list_for_each_entry_rcu(rule, &ops->rules_list, list) + call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule); } static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { @@ -228,7 +273,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; - call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule); err = 0; errout: @@ -250,7 +295,7 @@ static int fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; - call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule); errout: return err; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 317026a39cfa..da449ddb8cc1 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -57,7 +57,6 @@ static unsigned int fib_info_cnt; static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; #ifdef CONFIG_IP_ROUTE_MULTIPATH -u32 fib_multipath_secret __read_mostly; #define for_nexthops(fi) { \ int nhsel; const struct fib_nh *nh; \ @@ -576,9 +575,6 @@ static void fib_rebalance(struct fib_info *fi) atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); } endfor_nexthops(fi); - - net_get_random_once(&fib_multipath_secret, - sizeof(fib_multipath_secret)); } static inline void fib_add_weight(struct fib_info *fi, @@ -1641,7 +1637,7 @@ void fib_select_multipath(struct fib_result *res, int hash) #endif void fib_select_path(struct net *net, struct fib_result *res, - struct flowi4 *fl4, int mp_hash) + struct flowi4 *fl4, const struct sk_buff *skb) { bool oif_check; @@ -1650,10 +1646,9 @@ void fib_select_path(struct net *net, struct fib_result *res, #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi->fib_nhs > 1 && oif_check) { - if (mp_hash < 0) - mp_hash = get_hash_from_flowi4(fl4) >> 1; + int h = fib_multipath_hash(res->fi, fl4, skb); - fib_select_multipath(res, mp_hash); + fib_select_multipath(res, h); } else #endif diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2f0d8233950f..1201409ba1dc 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -84,43 +84,6 @@ #include <trace/events/fib.h> #include "fib_lookup.h" -static unsigned int fib_seq_sum(void) -{ - unsigned int fib_seq = 0; - struct net *net; - - rtnl_lock(); - for_each_net(net) - fib_seq += net->ipv4.fib_seq; - rtnl_unlock(); - - return fib_seq; -} - -static ATOMIC_NOTIFIER_HEAD(fib_chain); - -static int call_fib_notifier(struct notifier_block *nb, struct net *net, - enum fib_event_type event_type, - struct fib_notifier_info *info) -{ - info->net = net; - return nb->notifier_call(nb, event_type, info); -} - -static void fib_rules_notify(struct net *net, struct notifier_block *nb, - enum fib_event_type event_type) -{ -#ifdef CONFIG_IP_MULTIPLE_TABLES - struct fib_notifier_info info; - - if (net->ipv4.fib_has_custom_rules) - call_fib_notifier(nb, net, event_type, &info); -#endif -} - -static void fib_notify(struct net *net, struct notifier_block *nb, - enum fib_event_type event_type); - static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, u32 dst, int dst_len, struct fib_info *fi, @@ -137,62 +100,6 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, return call_fib_notifier(nb, net, event_type, &info.info); } -static bool fib_dump_is_consistent(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb), - unsigned int fib_seq) -{ - atomic_notifier_chain_register(&fib_chain, nb); - if (fib_seq == fib_seq_sum()) - return true; - atomic_notifier_chain_unregister(&fib_chain, nb); - if (cb) - cb(nb); - return false; -} - -#define FIB_DUMP_MAX_RETRIES 5 -int register_fib_notifier(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb)) -{ - int retries = 0; - - do { - unsigned int fib_seq = fib_seq_sum(); - struct net *net; - - /* Mutex semantics guarantee that every change done to - * FIB tries before we read the change sequence counter - * is now visible to us. - */ - rcu_read_lock(); - for_each_net_rcu(net) { - fib_rules_notify(net, nb, FIB_EVENT_RULE_ADD); - fib_notify(net, nb, FIB_EVENT_ENTRY_ADD); - } - rcu_read_unlock(); - - if (fib_dump_is_consistent(nb, cb, fib_seq)) - return 0; - } while (++retries < FIB_DUMP_MAX_RETRIES); - - return -EBUSY; -} -EXPORT_SYMBOL(register_fib_notifier); - -int unregister_fib_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&fib_chain, nb); -} -EXPORT_SYMBOL(unregister_fib_notifier); - -int call_fib_notifiers(struct net *net, enum fib_event_type event_type, - struct fib_notifier_info *info) -{ - net->ipv4.fib_seq++; - info->net = net; - return atomic_notifier_call_chain(&fib_chain, event_type, info); -} - static int call_fib_entry_notifiers(struct net *net, enum fib_event_type event_type, u32 dst, int dst_len, struct fib_info *fi, @@ -1995,8 +1902,7 @@ int fib_table_flush(struct net *net, struct fib_table *tb) } static void fib_leaf_notify(struct net *net, struct key_vector *l, - struct fib_table *tb, struct notifier_block *nb, - enum fib_event_type event_type) + struct fib_table *tb, struct notifier_block *nb) { struct fib_alias *fa; @@ -2012,22 +1918,21 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l, if (tb->tb_id != fa->tb_id) continue; - call_fib_entry_notifier(nb, net, event_type, l->key, + call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key, KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, fa->fa_type, fa->tb_id); } } static void fib_table_notify(struct net *net, struct fib_table *tb, - struct notifier_block *nb, - enum fib_event_type event_type) + struct notifier_block *nb) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *l, *tp = t->kv; t_key key = 0; while ((l = leaf_walk_rcu(&tp, key)) != NULL) { - fib_leaf_notify(net, l, tb, nb, event_type); + fib_leaf_notify(net, l, tb, nb); key = l->key + 1; /* stop in case of wrap around */ @@ -2036,8 +1941,7 @@ static void fib_table_notify(struct net *net, struct fib_table *tb, } } -static void fib_notify(struct net *net, struct notifier_block *nb, - enum fib_event_type event_type) +void fib_notify(struct net *net, struct notifier_block *nb) { unsigned int h; @@ -2046,7 +1950,7 @@ static void fib_notify(struct net *net, struct notifier_block *nb, struct fib_table *tb; hlist_for_each_entry_rcu(tb, head, tb_hlist) - fib_table_notify(net, tb, nb, event_type); + fib_table_notify(net, tb, nb); } } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index fc310db2708b..43318b5f5647 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -464,22 +464,6 @@ out_bh_enable: local_bh_enable(); } -#ifdef CONFIG_IP_ROUTE_MULTIPATH - -/* Source and destination is swapped. See ip_multipath_icmp_hash */ -static int icmp_multipath_hash_skb(const struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - - return fib_multipath_hash(iph->daddr, iph->saddr); -} - -#else - -#define icmp_multipath_hash_skb(skb) (-1) - -#endif - static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, @@ -505,8 +489,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); - rt = __ip_route_output_key_hash(net, fl4, - icmp_multipath_hash_skb(skb_in)); + rt = __ip_route_output_key_hash(net, fl4, skb_in); if (IS_ERR(rt)) return rt; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index bbe7f72db9c1..b3cdeec85f1f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -198,6 +198,7 @@ static void ip_expire(unsigned long arg) qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); + rcu_read_lock(); spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) @@ -207,7 +208,7 @@ static void ip_expire(unsigned long arg) __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); if (!inet_frag_evicting(&qp->q)) { - struct sk_buff *head = qp->q.fragments; + struct sk_buff *clone, *head = qp->q.fragments; const struct iphdr *iph; int err; @@ -216,32 +217,40 @@ static void ip_expire(unsigned long arg) if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) goto out; - rcu_read_lock(); head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) - goto out_rcu_unlock; + goto out; + /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); if (err) - goto out_rcu_unlock; + goto out; /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ if (frag_expire_skip_icmp(qp->user) && (skb_rtable(head)->rt_type != RTN_LOCAL)) - goto out_rcu_unlock; + goto out; + + clone = skb_clone(head, GFP_ATOMIC); /* Send an ICMP "Fragment Reassembly Timeout" message. */ - icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); -out_rcu_unlock: - rcu_read_unlock(); + if (clone) { + spin_unlock(&qp->q.lock); + icmp_send(clone, ICMP_TIME_EXCEEDED, + ICMP_EXC_FRAGTIME, 0); + consume_skb(clone); + goto out_rcu_unlock; + } } out: spin_unlock(&qp->q.lock); +out_rcu_unlock: + rcu_read_unlock(); ipq_put(qp); } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d6feabb03516..fa2dc8f692c6 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -313,6 +313,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct net_device *dev = skb->dev; + void (*edemux)(struct sk_buff *skb); /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -329,8 +330,8 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) int protocol = iph->protocol; ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot && ipprot->early_demux) { - ipprot->early_demux(skb); + if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { + edemux(skb); /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); } diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index a31f47ccaad9..baf196eaf1d8 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -235,7 +235,7 @@ static int ip_tun_build_state(struct nlattr *attr, struct nlattr *tb[LWTUNNEL_IP_MAX + 1]; int err; - err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy); + err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, NULL); if (err < 0) return err; @@ -332,7 +332,8 @@ static int ip6_tun_build_state(struct nlattr *attr, struct nlattr *tb[LWTUNNEL_IP6_MAX + 1]; int err; - err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy); + err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy, + NULL); if (err < 0) return err; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index fd9f34bbd740..c3b12b1c7162 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -57,6 +57,7 @@ #include <linux/export.h> #include <net/net_namespace.h> #include <net/arp.h> +#include <net/dsa.h> #include <net/ip.h> #include <net/ipconfig.h> #include <net/route.h> @@ -306,7 +307,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) { + if (d != ic_dev && !netdev_uses_dsa(dev)) { pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c0317c940bcd..d7be21f2174a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -631,7 +631,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, in_dev = __in_dev_get_rtnl(dev); if (in_dev) { IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; - inet_netconf_notify_devconf(dev_net(dev), + inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in_dev->cnf); ip_rt_multicast_event(in_dev); @@ -820,8 +820,8 @@ static int vif_add(struct net *net, struct mr_table *mrt, return -EADDRNOTAVAIL; } IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; - inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, dev->ifindex, - &in_dev->cnf); + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, + dev->ifindex, &in_dev->cnf); ip_rt_multicast_event(in_dev); /* Fill in the VIF structures */ @@ -1282,7 +1282,8 @@ static void mrtsock_destruct(struct sock *sk) ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; - inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); RCU_INIT_POINTER(mrt->mroute_sk, NULL); @@ -1344,7 +1345,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, if (ret == 0) { rcu_assign_pointer(mrt->mroute_sk, sk); IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; - inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); } @@ -2437,7 +2439,8 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, struct rtmsg *rtm; int ret, rem; - ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy); + ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy, + NULL); if (ret < 0) goto out; rtm = nlmsg_data(nlh); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 6241a81fd7f5..f17dab1dee6e 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -562,8 +562,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } - if (ret != 0) - goto out_free; ret = -EINVAL; if (i != repl->num_entries) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 52f26459efc3..038f293c2376 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -22,6 +22,7 @@ #include <linux/icmp.h> #include <linux/if_arp.h> #include <linux/seq_file.h> +#include <linux/refcount.h> #include <linux/netfilter_arp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> @@ -40,8 +41,8 @@ MODULE_DESCRIPTION("Xtables: CLUSTERIP target"); struct clusterip_config { struct list_head list; /* list of all configs */ - atomic_t refcount; /* reference count */ - atomic_t entries; /* number of entries/rules + refcount_t refcount; /* reference count */ + refcount_t entries; /* number of entries/rules * referencing us */ __be32 clusterip; /* the IP address */ @@ -77,7 +78,7 @@ struct clusterip_net { static inline void clusterip_config_get(struct clusterip_config *c) { - atomic_inc(&c->refcount); + refcount_inc(&c->refcount); } @@ -89,7 +90,7 @@ static void clusterip_config_rcu_free(struct rcu_head *head) static inline void clusterip_config_put(struct clusterip_config *c) { - if (atomic_dec_and_test(&c->refcount)) + if (refcount_dec_and_test(&c->refcount)) call_rcu_bh(&c->rcu, clusterip_config_rcu_free); } @@ -103,7 +104,7 @@ clusterip_config_entry_put(struct clusterip_config *c) struct clusterip_net *cn = net_generic(net, clusterip_net_id); local_bh_disable(); - if (atomic_dec_and_lock(&c->entries, &cn->lock)) { + if (refcount_dec_and_lock(&c->entries, &cn->lock)) { list_del_rcu(&c->list); spin_unlock(&cn->lock); local_bh_enable(); @@ -149,10 +150,10 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) c = NULL; else #endif - if (unlikely(!atomic_inc_not_zero(&c->refcount))) + if (unlikely(!refcount_inc_not_zero(&c->refcount))) c = NULL; else if (entry) - atomic_inc(&c->entries); + refcount_inc(&c->entries); } rcu_read_unlock_bh(); @@ -188,8 +189,8 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, clusterip_config_init_nodelist(c, i); c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; - atomic_set(&c->refcount, 1); - atomic_set(&c->entries, 1); + refcount_set(&c->refcount, 1); + refcount_set(&c->entries, 1); spin_lock_bh(&cn->lock); if (__clusterip_config_find(net, ip)) { @@ -461,7 +462,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) clusterip_config_put(cipinfo->config); - nf_ct_netns_get(par->net, par->family); + nf_ct_netns_put(par->net, par->family); } #ifdef CONFIG_COMPAT diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index bc1486f2c064..2e14ed11a35c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -165,6 +165,10 @@ static unsigned int ipv4_conntrack_local(void *priv, if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; + + if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */ + return NF_ACCEPT; + return nf_conntrack_in(state->net, PF_INET, state->hook, skb); } diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index f8aad03d674b..6f5e8d01b876 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -255,11 +255,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook); - /* We never see fragments: conntrack defrags on pre-routing - * and local-out, and nf_nat_out protects post-routing. - */ - NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); - ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would * have dropped it. Hence it's the user's responsibilty to diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index c9b52c361da2..da04b9c33ef3 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -998,18 +998,6 @@ err_id_free: * *****************************************************************************/ -static void hex_dump(const unsigned char *buf, size_t len) -{ - size_t i; - - for (i = 0; i < len; i++) { - if (i && !(i % 16)) - printk("\n"); - printk("%02x ", *(buf + i)); - } - printk("\n"); -} - /* * Parse and mangle SNMP message according to mapping. * (And this is the fucking 'basic' method). @@ -1026,7 +1014,8 @@ static int snmp_parse_mangle(unsigned char *msg, struct snmp_object *obj; if (debug > 1) - hex_dump(msg, len); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 1, + msg, len, 0); asn1_open(&ctx, msg, len); @@ -1260,16 +1249,6 @@ static const struct nf_conntrack_expect_policy snmp_exp_policy = { .timeout = 180, }; -static struct nf_conntrack_helper snmp_helper __read_mostly = { - .me = THIS_MODULE, - .help = help, - .expect_policy = &snmp_exp_policy, - .name = "snmp", - .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), - .tuple.dst.protonum = IPPROTO_UDP, -}; - static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { .me = THIS_MODULE, .help = help, @@ -1288,22 +1267,16 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { static int __init nf_nat_snmp_basic_init(void) { - int ret = 0; - BUG_ON(nf_nat_snmp_hook != NULL); RCU_INIT_POINTER(nf_nat_snmp_hook, help); - ret = nf_conntrack_helper_register(&snmp_trap_helper); - if (ret < 0) { - nf_conntrack_helper_unregister(&snmp_helper); - return ret; - } - return ret; + return nf_conntrack_helper_register(&snmp_trap_helper); } static void __exit nf_nat_snmp_basic_fini(void) { RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); + synchronize_rcu(); nf_conntrack_helper_unregister(&snmp_trap_helper); } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 146d86105183..7cd8d0d918f8 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -104,7 +104,6 @@ EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) { struct sk_buff *nskb; - const struct iphdr *oiph; struct iphdr *niph; const struct tcphdr *oth; struct tcphdr _oth; @@ -116,8 +115,6 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) return; - oiph = ip_hdr(oldskb); - nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + LL_MAX_HEADER, GFP_ATOMIC); if (!nskb) diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 2981291910dd..f4e4462cb5bb 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -90,7 +90,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { - nft_fib_store_result(dest, priv->result, pkt, + nft_fib_store_result(dest, priv, pkt, nft_in(pkt)->ifindex); return; } @@ -99,7 +99,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, if (ipv4_is_zeronet(iph->saddr)) { if (ipv4_is_lbcast(iph->daddr) || ipv4_is_local_multicast(iph->daddr)) { - nft_fib_store_result(dest, priv->result, pkt, + nft_fib_store_result(dest, priv, pkt, get_ifindex(pkt->skb->dev)); return; } diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index a0ea8aad1bf1..f18677277119 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -26,10 +26,10 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); range.flags = priv->flags; if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); } regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt), &range, nft_out(pkt)); diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index 1650ed23c15d..5120be1d3118 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -26,10 +26,10 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr, memset(&mr, 0, sizeof(mr)); if (priv->sreg_proto_min) { - mr.range[0].min.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - mr.range[0].max.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + mr.range[0].min.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + mr.range[0].max.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2af6244b83e2..ccfbce13a633 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -156,17 +156,18 @@ int ping_hash(struct sock *sk) void ping_unhash(struct sock *sk) { struct inet_sock *isk = inet_sk(sk); + pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); + write_lock_bh(&ping_table.lock); if (sk_hashed(sk)) { - write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); sk_nulls_node_init(&sk->sk_nulls_node); sock_put(sk); isk->inet_num = 0; isk->inet_sport = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(&ping_table.lock); } + write_unlock_bh(&ping_table.lock); } EXPORT_SYMBOL_GPL(ping_unhash); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 69cf49e8356d..4ccbf464d1ac 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -199,7 +199,6 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TW", LINUX_MIB_TIMEWAITED), SNMP_MIB_ITEM("TWRecycled", LINUX_MIB_TIMEWAITRECYCLED), SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED), - SNMP_MIB_ITEM("PAWSPassive", LINUX_MIB_PAWSPASSIVEREJECTED), SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED), SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED), SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS), diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 4b7c0ec65251..32a691b7ce2c 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -28,7 +28,7 @@ #include <linux/spinlock.h> #include <net/protocol.h> -const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; +struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly; EXPORT_SYMBOL(inet_offloads); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8471dd116771..7a4f2c38c3c4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1250,15 +1250,11 @@ static void set_class_tag(struct rtable *rt, u32 tag) static unsigned int ipv4_default_advmss(const struct dst_entry *dst) { - unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS); + unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); + unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size, + ip_rt_min_advmss); - if (advmss == 0) { - advmss = max_t(unsigned int, dst->dev->mtu - 40, - ip_rt_min_advmss); - if (advmss > 65535 - 40) - advmss = 65535 - 40; - } - return advmss; + return min(advmss, IPV4_MAX_PMTU - header_size); } static unsigned int ipv4_mtu(const struct dst_entry *dst) @@ -1734,45 +1730,97 @@ out: } #ifdef CONFIG_IP_ROUTE_MULTIPATH - /* To make ICMP packets follow the right flow, the multipath hash is - * calculated from the inner IP addresses in reverse order. + * calculated from the inner IP addresses. */ -static int ip_multipath_icmp_hash(struct sk_buff *skb) +static void ip_multipath_l3_keys(const struct sk_buff *skb, + struct flow_keys *hash_keys) { const struct iphdr *outer_iph = ip_hdr(skb); - struct icmphdr _icmph; + const struct iphdr *inner_iph; const struct icmphdr *icmph; struct iphdr _inner_iph; - const struct iphdr *inner_iph; + struct icmphdr _icmph; + + hash_keys->addrs.v4addrs.src = outer_iph->saddr; + hash_keys->addrs.v4addrs.dst = outer_iph->daddr; + if (likely(outer_iph->protocol != IPPROTO_ICMP)) + return; if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) - goto standard_hash; + return; icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), &_icmph); if (!icmph) - goto standard_hash; + return; if (icmph->type != ICMP_DEST_UNREACH && icmph->type != ICMP_REDIRECT && icmph->type != ICMP_TIME_EXCEEDED && - icmph->type != ICMP_PARAMETERPROB) { - goto standard_hash; - } + icmph->type != ICMP_PARAMETERPROB) + return; inner_iph = skb_header_pointer(skb, outer_iph->ihl * 4 + sizeof(_icmph), sizeof(_inner_iph), &_inner_iph); if (!inner_iph) - goto standard_hash; + return; + hash_keys->addrs.v4addrs.src = inner_iph->saddr; + hash_keys->addrs.v4addrs.dst = inner_iph->daddr; +} - return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr); +/* if skb is set it will be used and fl4 can be NULL */ +int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, + const struct sk_buff *skb) +{ + struct net *net = fi->fib_net; + struct flow_keys hash_keys; + u32 mhash; -standard_hash: - return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr); -} + switch (net->ipv4.sysctl_fib_multipath_hash_policy) { + case 0: + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (skb) { + ip_multipath_l3_keys(skb, &hash_keys); + } else { + hash_keys.addrs.v4addrs.src = fl4->saddr; + hash_keys.addrs.v4addrs.dst = fl4->daddr; + } + break; + case 1: + /* skb is currently provided only when forwarding */ + if (skb) { + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; + struct flow_keys keys; + + /* short-circuit if we already have L4 hash present */ + if (skb->l4_hash) + return skb_get_hash_raw(skb) >> 1; + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, flag); + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + hash_keys.ports.src = keys.ports.src; + hash_keys.ports.dst = keys.ports.dst; + hash_keys.basic.ip_proto = keys.basic.ip_proto; + } else { + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + hash_keys.addrs.v4addrs.src = fl4->saddr; + hash_keys.addrs.v4addrs.dst = fl4->daddr; + hash_keys.ports.src = fl4->fl4_sport; + hash_keys.ports.dst = fl4->fl4_dport; + hash_keys.basic.ip_proto = fl4->flowi4_proto; + } + break; + } + mhash = flow_hash_from_keys(&hash_keys); + return mhash >> 1; +} +EXPORT_SYMBOL_GPL(fib_multipath_hash); #endif /* CONFIG_IP_ROUTE_MULTIPATH */ static int ip_mkroute_input(struct sk_buff *skb, @@ -1782,12 +1830,8 @@ static int ip_mkroute_input(struct sk_buff *skb, { #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) { - int h; + int h = fib_multipath_hash(res->fi, NULL, skb); - if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP)) - h = ip_multipath_icmp_hash(skb); - else - h = fib_multipath_hash(saddr, daddr); fib_select_multipath(res, h); } #endif @@ -2203,7 +2247,7 @@ add: */ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, - int mp_hash) + const struct sk_buff *skb) { struct net_device *dev_out = NULL; __u8 tos = RT_FL_TOS(fl4); @@ -2365,7 +2409,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, goto make_route; } - fib_select_path(net, &res, fl4, mp_hash); + fib_select_path(net, &res, fl4, skb); dev_out = FIB_RES_DEV(res); fl4->flowi4_oif = dev_out->ifindex; @@ -2601,7 +2645,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) u32 table_id = RT_TABLE_MAIN; kuid_t uid; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, + NULL); if (err < 0) goto errout; @@ -2619,10 +2664,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) skb_reset_mac_header(skb); skb_reset_network_header(skb); - /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */ - ip_hdr(skb)->protocol = IPPROTO_ICMP; - skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); - src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; @@ -2632,6 +2673,15 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) else uid = (iif ? INVALID_UID : current_uid()); + /* Bugfix: need to give ip_route_input enough of an IP header to + * not gag. + */ + ip_hdr(skb)->protocol = IPPROTO_UDP; + ip_hdr(skb)->saddr = src; + ip_hdr(skb)->daddr = dst; + + skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); + memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; fl4.saddr = src; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d6880a6149ee..6fb25693c00b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -24,6 +24,7 @@ #include <net/cipso_ipv4.h> #include <net/inet_frag.h> #include <net/ping.h> +#include <net/protocol.h> static int zero; static int one = 1; @@ -294,6 +295,58 @@ bad_key: return ret; } +static void proc_configure_early_demux(int enabled, int protocol) +{ + struct net_protocol *ipprot; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_protocol *ip6prot; +#endif + + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot) + ipprot->early_demux = enabled ? ipprot->early_demux_handler : + NULL; + +#if IS_ENABLED(CONFIG_IPV6) + ip6prot = rcu_dereference(inet6_protos[protocol]); + if (ip6prot) + ip6prot->early_demux = enabled ? ip6prot->early_demux_handler : + NULL; +#endif +} + +static int proc_tcp_early_demux(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = 0; + + ret = proc_dointvec(table, write, buffer, lenp, ppos); + + if (write && !ret) { + int enabled = init_net.ipv4.sysctl_tcp_early_demux; + + proc_configure_early_demux(enabled, IPPROTO_TCP); + } + + return ret; +} + +static int proc_udp_early_demux(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = 0; + + ret = proc_dointvec(table, write, buffer, lenp, ppos); + + if (write && !ret) { + int enabled = init_net.ipv4.sysctl_udp_early_demux; + + proc_configure_early_demux(enabled, IPPROTO_UDP); + } + + return ret; +} + static struct ctl_table ipv4_table[] = { { .procname = "tcp_timestamps", @@ -750,6 +803,20 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { + .procname = "udp_early_demux", + .data = &init_net.ipv4.sysctl_udp_early_demux, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_udp_early_demux + }, + { + .procname = "tcp_early_demux", + .data = &init_net.ipv4.sysctl_tcp_early_demux, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_tcp_early_demux + }, + { .procname = "ip_default_ttl", .data = &init_net.ipv4.sysctl_ip_default_ttl, .maxlen = sizeof(int), @@ -981,13 +1048,6 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { - .procname = "tcp_tw_recycle", - .data = &init_net.ipv4.tcp_death_row.sysctl_tw_recycle, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_max_syn_backlog", .data = &init_net.ipv4.sysctl_max_syn_backlog, .maxlen = sizeof(int), @@ -1004,6 +1064,15 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "fib_multipath_hash_policy", + .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, #endif { .procname = "ip_unprivileged_port_start", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cf4555581282..04843ae77b9e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2322,6 +2322,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); + tcp_saved_syn_free(tp); /* Clean up fastopen related fields */ tcp_free_fastopen_req(tp); @@ -2393,7 +2394,7 @@ static int tcp_repair_options_est(struct tcp_sock *tp, u16 snd_wscale = opt.opt_val & 0xFFFF; u16 rcv_wscale = opt.opt_val >> 16; - if (snd_wscale > 14 || rcv_wscale > 14) + if (snd_wscale > TCP_MAX_WSCALE || rcv_wscale > TCP_MAX_WSCALE) return -EFBIG; tp->rx_opt.snd_wscale = snd_wscale; @@ -2470,7 +2471,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, /* Values greater than interface MTU won't take effect. However * at the point when this call is done we typically don't yet * know which interface is going to be used */ - if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) { + if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) { err = -EINVAL; break; } @@ -2770,7 +2771,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) { const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); - u32 now = tcp_time_stamp, intv; + u32 now, intv; u64 rate64; bool slow; u32 rate; @@ -2839,6 +2840,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_retrans = tp->retrans_out; info->tcpi_fackets = tp->fackets_out; + now = tcp_time_stamp; info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 39c393cc0fd3..a5838858c362 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -126,7 +126,8 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define REXMIT_LOST 1 /* retransmit packets marked lost */ #define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */ -static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb) +static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb, + unsigned int len) { static bool __once __read_mostly; @@ -137,8 +138,9 @@ static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb) rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); - pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", - dev ? dev->name : "Unknown driver"); + if (!dev || len >= dev->mtu) + pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", + dev ? dev->name : "Unknown driver"); rcu_read_unlock(); } } @@ -161,8 +163,10 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) if (len >= icsk->icsk_ack.rcv_mss) { icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); - if (unlikely(icsk->icsk_ack.rcv_mss != len)) - tcp_gro_dev_warn(sk, skb); + /* Account for possibly-removed options */ + if (unlikely(len > icsk->icsk_ack.rcv_mss + + MAX_TCP_OPTION_SPACE)) + tcp_gro_dev_warn(sk, skb, len); } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. @@ -874,22 +878,11 @@ static void tcp_update_reordering(struct sock *sk, const int metric, const int ts) { struct tcp_sock *tp = tcp_sk(sk); - if (metric > tp->reordering) { - int mib_idx; + int mib_idx; + if (metric > tp->reordering) { tp->reordering = min(sysctl_tcp_max_reordering, metric); - /* This exciting event is worth to be remembered. 8) */ - if (ts) - mib_idx = LINUX_MIB_TCPTSREORDER; - else if (tcp_is_reno(tp)) - mib_idx = LINUX_MIB_TCPRENOREORDER; - else if (tcp_is_fack(tp)) - mib_idx = LINUX_MIB_TCPFACKREORDER; - else - mib_idx = LINUX_MIB_TCPSACKREORDER; - - NET_INC_STATS(sock_net(sk), mib_idx); #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, @@ -902,6 +895,18 @@ static void tcp_update_reordering(struct sock *sk, const int metric, } tp->rack.reord = 1; + + /* This exciting event is worth to be remembered. 8) */ + if (ts) + mib_idx = LINUX_MIB_TCPTSREORDER; + else if (tcp_is_reno(tp)) + mib_idx = LINUX_MIB_TCPRENOREORDER; + else if (tcp_is_fack(tp)) + mib_idx = LINUX_MIB_TCPFACKREORDER; + else + mib_idx = LINUX_MIB_TCPSACKREORDER; + + NET_INC_STATS(sock_net(sk), mib_idx); } /* This must be called before lost_out is incremented */ @@ -1930,6 +1935,7 @@ void tcp_enter_loss(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct sk_buff *skb; + bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; bool is_reneg; /* is receiver reneging on SACKs? */ bool mark_lost; @@ -1989,15 +1995,18 @@ void tcp_enter_loss(struct sock *sk) tp->high_seq = tp->snd_nxt; tcp_ecn_queue_cwr(tp); - /* F-RTO RFC5682 sec 3.1 step 1 mandates to disable F-RTO - * if a previous recovery is underway, otherwise it may incorrectly - * call a timeout spurious if some previously retransmitted packets - * are s/acked (sec 3.2). We do not apply that retriction since - * retransmitted skbs are permanently tagged with TCPCB_EVER_RETRANS - * so FLAG_ORIG_SACK_ACKED is always correct. But we do disable F-RTO - * on PTMU discovery to avoid sending new data. + /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous + * loss recovery is underway except recurring timeout(s) on + * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing + * + * In theory F-RTO can be used repeatedly during loss recovery. + * In practice this interacts badly with broken middle-boxes that + * falsely raise the receive window, which results in repeated + * timeouts and stop-and-go behavior. */ - tp->frto = sysctl_tcp_frto && !inet_csk(sk)->icsk_mtup.probe_size; + tp->frto = sysctl_tcp_frto && + (new_recovery || icsk->icsk_retransmits) && + !inet_csk(sk)->icsk_mtup.probe_size; } /* If ACK arrived pointing to a remembered SACK, it means that our @@ -3759,11 +3768,12 @@ void tcp_parse_options(const struct sk_buff *skb, !estab && sysctl_tcp_window_scaling) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; - if (snd_wscale > 14) { - net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n", + if (snd_wscale > TCP_MAX_WSCALE) { + net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n", __func__, - snd_wscale); - snd_wscale = 14; + snd_wscale, + TCP_MAX_WSCALE); + snd_wscale = TCP_MAX_WSCALE; } opt_rx->snd_wscale = snd_wscale; } @@ -5541,6 +5551,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) struct inet_connection_sock *icsk = inet_csk(sk); tcp_set_state(sk, TCP_ESTABLISHED); + icsk->icsk_ack.lrcvtime = tcp_time_stamp; if (skb) { icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); @@ -5759,7 +5770,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * to stand against the temptation 8) --ANK */ inet_csk_schedule_ack(sk); - icsk->icsk_ack.lrcvtime = tcp_time_stamp; tcp_enter_quickack_mode(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); @@ -6324,36 +6334,14 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_free; if (isn && tmp_opt.tstamp_ok) - af_ops->init_seq(skb, &tcp_rsk(req)->ts_off); + af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off); if (!want_cookie && !isn) { - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (net->ipv4.tcp_death_row.sysctl_tw_recycle) { - bool strict; - - dst = af_ops->route_req(sk, &fl, req, &strict); - - if (dst && strict && - !tcp_peer_is_proven(req, dst, true, - tmp_opt.saw_tstamp)) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); - goto drop_and_release; - } - } /* Kill the following clause, if you dislike this way. */ - else if (!net->ipv4.sysctl_tcp_syncookies && - (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (net->ipv4.sysctl_max_syn_backlog >> 2)) && - !tcp_peer_is_proven(req, dst, false, - tmp_opt.saw_tstamp)) { + if (!net->ipv4.sysctl_tcp_syncookies && + (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (net->ipv4.sysctl_max_syn_backlog >> 2)) && + !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. @@ -6366,10 +6354,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_release; } - isn = af_ops->init_seq(skb, &tcp_rsk(req)->ts_off); + isn = af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off); } if (!dst) { - dst = af_ops->route_req(sk, &fl, req, NULL); + dst = af_ops->route_req(sk, &fl, req); if (!dst) goto drop_and_free; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 575e19dcc017..20cbd2f07f28 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -94,12 +94,12 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff) +static u32 tcp_v4_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff) { - return secure_tcp_sequence_number(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source, tsoff); + return secure_tcp_seq_and_tsoff(ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source, tsoff); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -198,10 +198,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tp->write_seq = 0; } - if (tcp_death_row->sysctl_tw_recycle && - !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) - tcp_fetch_timewait_stamp(sk, &rt->dst); - inet->inet_dport = usin->sin_port; sk_daddr_set(sk, daddr); @@ -236,11 +232,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = NULL; if (likely(!tp->repair)) { - seq = secure_tcp_sequence_number(inet->inet_saddr, - inet->inet_daddr, - inet->inet_sport, - usin->sin_port, - &tp->tsoffset); + seq = secure_tcp_seq_and_tsoff(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + usin->sin_port, + &tp->tsoffset); if (!tp->write_seq) tp->write_seq = seq; } @@ -1217,19 +1213,9 @@ static void tcp_v4_init_req(struct request_sock *req, static struct dst_entry *tcp_v4_route_req(const struct sock *sk, struct flowi *fl, - const struct request_sock *req, - bool *strict) + const struct request_sock *req) { - struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req); - - if (strict) { - if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr) - *strict = true; - else - *strict = false; - } - - return dst; + return inet_csk_route_req(sk, &fl->u.ip4, req); } struct request_sock_ops tcp_request_sock_ops __read_mostly = { @@ -1253,7 +1239,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .cookie_init_seq = cookie_v4_init_sequence, #endif .route_req = tcp_v4_route_req, - .init_seq = tcp_v4_init_sequence, + .init_seq_tsoff = tcp_v4_init_seq_and_tsoff, .send_synack = tcp_v4_send_synack, }; @@ -1423,8 +1409,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (!nsk) goto discard; if (nsk != sk) { - sock_rps_save_rxhash(nsk, skb); - sk_mark_napi_id(nsk, skb); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; @@ -2466,7 +2450,6 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_tw_reuse = 0; cnt = tcp_hashinfo.ehash_mask + 1; - net->ipv4.tcp_death_row.sysctl_tw_recycle = 0; net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2; net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 0f46e5fe31ad..9d0d4f39e42b 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -45,8 +45,6 @@ struct tcp_metrics_block { struct inetpeer_addr tcpm_saddr; struct inetpeer_addr tcpm_daddr; unsigned long tcpm_stamp; - u32 tcpm_ts; - u32 tcpm_ts_stamp; u32 tcpm_lock; u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1]; struct tcp_fastopen_metrics tcpm_fastopen; @@ -123,8 +121,6 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); - tm->tcpm_ts = 0; - tm->tcpm_ts_stamp = 0; if (fastopen_clear) { tm->tcpm_fastopen.mss = 0; tm->tcpm_fastopen.syn_loss = 0; @@ -273,48 +269,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, return tm; } -static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) -{ - struct tcp_metrics_block *tm; - struct inetpeer_addr saddr, daddr; - unsigned int hash; - struct net *net; - - if (tw->tw_family == AF_INET) { - inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr); - inetpeer_set_addr_v4(&daddr, tw->tw_daddr); - hash = ipv4_addr_hash(tw->tw_daddr); - } -#if IS_ENABLED(CONFIG_IPV6) - else if (tw->tw_family == AF_INET6) { - if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) { - inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr); - inetpeer_set_addr_v4(&daddr, tw->tw_daddr); - hash = ipv4_addr_hash(tw->tw_daddr); - } else { - inetpeer_set_addr_v6(&saddr, &tw->tw_v6_rcv_saddr); - inetpeer_set_addr_v6(&daddr, &tw->tw_v6_daddr); - hash = ipv6_addr_hash(&tw->tw_v6_daddr); - } - } -#endif - else - return NULL; - - net = twsk_net(tw); - hash ^= net_hash_mix(net); - hash = hash_32(hash, tcp_metrics_hash_log); - - for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; - tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_saddr, &saddr) && - addr_same(&tm->tcpm_daddr, &daddr) && - net_eq(tm_net(tm), net)) - break; - } - return tm; -} - static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, struct dst_entry *dst, bool create) @@ -573,8 +527,7 @@ reset: tp->snd_cwnd_stamp = tcp_time_stamp; } -bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, - bool paws_check, bool timestamps) +bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) { struct tcp_metrics_block *tm; bool ret; @@ -584,94 +537,10 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, rcu_read_lock(); tm = __tcp_get_metrics_req(req, dst); - if (paws_check) { - if (tm && - (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL && - ((s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW || - !timestamps)) - ret = false; - else - ret = true; - } else { - if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp) - ret = true; - else - ret = false; - } - rcu_read_unlock(); - - return ret; -} - -void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst) -{ - struct tcp_metrics_block *tm; - - rcu_read_lock(); - tm = tcp_get_metrics(sk, dst, true); - if (tm) { - struct tcp_sock *tp = tcp_sk(sk); - - if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) { - tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp; - tp->rx_opt.ts_recent = tm->tcpm_ts; - } - } - rcu_read_unlock(); -} -EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp); - -/* VJ's idea. Save last timestamp seen from this destination and hold - * it at least for normal timewait interval to use for duplicate - * segment detection in subsequent connections, before they enter - * synchronized state. - */ -bool tcp_remember_stamp(struct sock *sk) -{ - struct dst_entry *dst = __sk_dst_get(sk); - bool ret = false; - - if (dst) { - struct tcp_metrics_block *tm; - - rcu_read_lock(); - tm = tcp_get_metrics(sk, dst, true); - if (tm) { - struct tcp_sock *tp = tcp_sk(sk); - - if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 || - ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL && - tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { - tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; - tm->tcpm_ts = tp->rx_opt.ts_recent; - } - ret = true; - } - rcu_read_unlock(); - } - return ret; -} - -bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) -{ - struct tcp_metrics_block *tm; - bool ret = false; - - rcu_read_lock(); - tm = __tcp_get_metrics_tw(tw); - if (tm) { - const struct tcp_timewait_sock *tcptw; - struct sock *sk = (struct sock *) tw; - - tcptw = tcp_twsk(sk); - if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 || - ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL && - tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { - tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; - tm->tcpm_ts = tcptw->tw_ts_recent; - } + if (tm && tcp_metric_get(tm, TCP_METRIC_RTT)) ret = true; - } + else + ret = false; rcu_read_unlock(); return ret; @@ -791,14 +660,6 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, jiffies - tm->tcpm_stamp, TCP_METRICS_ATTR_PAD) < 0) goto nla_put_failure; - if (tm->tcpm_ts_stamp) { - if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP, - (s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0) - goto nla_put_failure; - if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL, - tm->tcpm_ts) < 0) - goto nla_put_failure; - } { int n = 0; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7e16243cdb58..8f6373b0cd77 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -26,6 +26,7 @@ #include <net/tcp.h> #include <net/inet_common.h> #include <net/xfrm.h> +#include <net/busy_poll.h> int sysctl_tcp_abort_on_overflow __read_mostly; @@ -94,7 +95,6 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, struct tcp_options_received tmp_opt; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); bool paws_reject = false; - struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { @@ -149,12 +149,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tcptw->tw_ts_recent = tmp_opt.rcv_tsval; } - if (tcp_death_row->sysctl_tw_recycle && - tcptw->tw_ts_recent_stamp && - tcp_tw_remember_stamp(tw)) - inet_twsk_reschedule(tw, tw->tw_timeout); - else - inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } @@ -259,12 +254,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); struct inet_timewait_sock *tw; - bool recycle_ok = false; struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; - if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) - recycle_ok = tcp_remember_stamp(sk); - tw = inet_twsk_alloc(sk, tcp_death_row, state); if (tw) { @@ -317,13 +308,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (timeo < rto) timeo = rto; - if (recycle_ok) { - tw->tw_timeout = rto; - } else { - tw->tw_timeout = TCP_TIMEWAIT_LEN; - if (state == TCP_TIME_WAIT) - timeo = TCP_TIMEWAIT_LEN; - } + tw->tw_timeout = TCP_TIMEWAIT_LEN; + if (state == TCP_TIME_WAIT) + timeo = TCP_TIMEWAIT_LEN; inet_twsk_schedule(tw, timeo); /* Linkage updates. */ @@ -460,6 +447,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U); newicsk->icsk_rto = TCP_TIMEOUT_INIT; + newicsk->icsk_ack.lrcvtime = tcp_time_stamp; newtp->packets_out = 0; newtp->retrans_out = 0; @@ -812,6 +800,9 @@ int tcp_child_process(struct sock *parent, struct sock *child, int ret = 0; int state = child->sk_state; + /* record NAPI ID of child */ + sk_mark_napi_id(child, skb); + tcp_segs_in(tcp_sk(child), skb); if (!sock_owned_by_user(child)) { ret = tcp_rcv_state_process(child, skb); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 22548b5f05cb..ffc9274b2706 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -212,12 +212,12 @@ void tcp_select_initial_window(int __space, __u32 mss, /* If no clamp set the clamp to the max possible scaled window */ if (*window_clamp == 0) - (*window_clamp) = (65535 << 14); + (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE); space = min(*window_clamp, space); /* Quantize space offering to a multiple of mss if possible. */ if (space > mss) - space = (space / mss) * mss; + space = rounddown(space, mss); /* NOTE: offering an initial window larger than 32767 * will break some buggy TCP stacks. If the admin tells us @@ -234,13 +234,11 @@ void tcp_select_initial_window(int __space, __u32 mss, (*rcv_wscale) = 0; if (wscale_ok) { - /* Set window scaling on max possible window - * See RFC1323 for an explanation of the limit to 14 - */ + /* Set window scaling on max possible window */ space = max_t(u32, space, sysctl_tcp_rmem[2]); space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); - while (space > 65535 && (*rcv_wscale) < 14) { + while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) { space >>= 1; (*rcv_wscale)++; } @@ -253,7 +251,7 @@ void tcp_select_initial_window(int __space, __u32 mss, } /* Set the clamp no higher than max representable value */ - (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); + (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp); } EXPORT_SYMBOL(tcp_select_initial_window); @@ -2561,7 +2559,6 @@ u32 __tcp_select_window(struct sock *sk) /* Don't do rounding if we are using window scaling, since the * scaled window will not line up with the MSS boundary anyway. */ - window = tp->rcv_wnd; if (tp->rx_opt.rcv_wscale) { window = free_space; @@ -2569,10 +2566,9 @@ u32 __tcp_select_window(struct sock *sk) * Import case: prevent zero window announcement if * 1<<rcv_wscale > mss. */ - if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window) - window = (((window >> tp->rx_opt.rcv_wscale) + 1) - << tp->rx_opt.rcv_wscale); + window = ALIGN(window, (1 << tp->rx_opt.rcv_wscale)); } else { + window = tp->rcv_wnd; /* Get the largest window that is a nice multiple of mss. * Window clamp already applied above. * If our current window offering is within 1 mss of the @@ -2582,7 +2578,7 @@ u32 __tcp_select_window(struct sock *sk) * is too small. */ if (window <= free_space - mss || window > free_space) - window = (free_space / mss) * mss; + window = rounddown(free_space, mss); else if (mss == full_space && free_space > window + (full_space >> 1)) window = free_space; @@ -2999,6 +2995,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) { struct sk_buff *skb; + TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); + /* NOTE: No TCP options attached and we never retransmit this. */ skb = alloc_skb(MAX_TCP_HEADER, priority); if (!skb) { @@ -3014,8 +3012,6 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) /* Send it off. */ if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); - - TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); } /* Send a crossed SYN-ACK during socket establishment. diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 4ecb38ae8504..d8acbd9f477a 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -12,7 +12,8 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) /* Account for retransmits that are lost again */ TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT, + tcp_skb_pcount(skb)); } } diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index fed66dc0e0f5..9775453b8d17 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -265,8 +265,8 @@ static size_t tcp_westwood_info(struct sock *sk, u32 ext, int *attr, if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { info->vegas.tcpv_enabled = 1; info->vegas.tcpv_rttcnt = 0; - info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt), - info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min), + info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt); + info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min); *attr = INET_DIAG_VEGASINFO; return sizeof(struct tcpvegas_info); diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index e2afe677a9d9..48c452959d2c 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -307,6 +307,7 @@ config IPV6_SEG6_LWTUNNEL bool "IPv6: Segment Routing Header encapsulation support" depends on IPV6 select LWTUNNEL + select DST_CACHE ---help--- Support for encapsulation of packets within an outer IPv6 header and a Segment Routing Header using the lightweight diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 363172527e43..d6da0fe5acca 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -224,6 +224,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_rtr_pref = 1, .rtr_probe_interval = 60 * HZ, #ifdef CONFIG_IPV6_ROUTE_INFO + .accept_ra_rt_info_min_plen = 0, .accept_ra_rt_info_max_plen = 0, #endif #endif @@ -245,6 +246,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { #endif .enhanced_dad = 1, .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, + .disable_policy = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -276,6 +278,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_ra_rtr_pref = 1, .rtr_probe_interval = 60 * HZ, #ifdef CONFIG_IPV6_ROUTE_INFO + .accept_ra_rt_info_min_plen = 0, .accept_ra_rt_info_max_plen = 0, #endif #endif @@ -297,6 +300,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { #endif .enhanced_dad = 1, .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, + .disable_policy = 0, }; /* Check if a valid qdisc is available */ @@ -545,6 +549,9 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) goto nla_put_failure; + if (!devconf) + goto out; + if ((all || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) goto nla_put_failure; @@ -563,6 +570,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, devconf->ignore_routes_with_linkdown) < 0) goto nla_put_failure; +out: nlmsg_end(skb, nlh); return 0; @@ -571,8 +579,8 @@ nla_put_failure: return -EMSGSIZE; } -void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv6_devconf *devconf) +void inet6_netconf_notify_devconf(struct net *net, int event, int type, + int ifindex, struct ipv6_devconf *devconf) { struct sk_buff *skb; int err = -ENOBUFS; @@ -582,7 +590,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, goto errout; err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, - RTM_NEWNETCONF, 0, type); + event, 0, type); if (err < 0) { /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */ WARN_ON(err == -EMSGSIZE); @@ -616,7 +624,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, int err; err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_ipv6_policy); + devconf_ipv6_policy, NULL); if (err < 0) goto errout; @@ -765,7 +773,8 @@ static void dev_forward_change(struct inet6_dev *idev) else addrconf_leave_anycast(ifa); } - inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING, + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, + NETCONFA_FORWARDING, dev->ifindex, &idev->cnf); } @@ -800,7 +809,8 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_dflt->forwarding) { if ((!newf) ^ (!old)) - inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); rtnl_unlock(); @@ -812,13 +822,15 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) net->ipv6.devconf_dflt->forwarding = newf; if ((!newf) ^ (!old_dflt)) - inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); addrconf_forward_change(net, newf); if ((!newf) ^ (!old)) - inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); } else if ((!newf) ^ (!old)) @@ -843,6 +855,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf) idev->cnf.ignore_routes_with_linkdown = newf; if (changed) inet6_netconf_notify_devconf(dev_net(dev), + RTM_NEWNETCONF, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, dev->ifindex, &idev->cnf); @@ -865,6 +878,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) { if ((!newf) ^ (!old)) inet6_netconf_notify_devconf(net, + RTM_NEWNETCONF, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); @@ -877,6 +891,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) addrconf_linkdown_change(net, newf); if ((!newf) ^ (!old)) inet6_netconf_notify_devconf(net, + RTM_NEWNETCONF, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); @@ -944,6 +959,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, const struct in6_addr *peer_addr, int pfxlen, int scope, u32 flags, u32 valid_lft, u32 prefered_lft) { + struct net *net = dev_net(idev->dev); struct inet6_ifaddr *ifa = NULL; struct rt6_info *rt; unsigned int hash; @@ -990,6 +1006,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, goto out; } + if (net->ipv6.devconf_all->disable_policy || + idev->cnf.disable_policy) + rt->dst.flags |= DST_NOPOLICY; + neigh_parms_data_state_setall(idev->nd_parms); ifa->addr = *addr; @@ -3626,14 +3646,19 @@ restart: INIT_LIST_HEAD(&del_list); list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { struct rt6_info *rt = NULL; + bool keep; addrconf_del_dad_work(ifa); + keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && + !addr_is_local(&ifa->addr); + if (!keep) + list_move(&ifa->if_list, &del_list); + write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); - if (keep_addr && (ifa->flags & IFA_F_PERMANENT) && - !addr_is_local(&ifa->addr)) { + if (keep) { /* set state to skip the notifier below */ state = INET6_IFADDR_STATE_DEAD; ifa->state = 0; @@ -3645,8 +3670,6 @@ restart: } else { state = ifa->state; ifa->state = INET6_IFADDR_STATE_DEAD; - - list_move(&ifa->if_list, &del_list); } spin_unlock_bh(&ifa->lock); @@ -4388,7 +4411,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) u32 ifa_flags; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, + NULL); if (err < 0) return err; @@ -4500,7 +4524,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) u32 ifa_flags; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, + NULL); if (err < 0) return err; @@ -4861,7 +4886,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) struct sk_buff *skb; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, + NULL); if (err < 0) goto errout; @@ -4972,6 +4998,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_RTR_PROBE_INTERVAL] = jiffies_to_msecs(cnf->rtr_probe_interval); #ifdef CONFIG_IPV6_ROUTE_INFO + array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen; array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif @@ -5003,6 +5030,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, #endif array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad; array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode; + array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; } static inline size_t inet6_ifla6_size(void) @@ -5229,7 +5257,8 @@ static int inet6_validate_link_af(const struct net_device *dev, if (dev && !__in6_dev_get(dev)) return -EAFNOSUPPORT; - return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy); + return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy, + NULL); } static int check_addr_gen_mode(int mode) @@ -5261,7 +5290,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) if (!idev) return -EAFNOSUPPORT; - if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0) + if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) BUG(); if (tb[IFLA_INET6_TOKEN]) { @@ -5664,17 +5693,20 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, return restart_syscall(); if (valp == &net->ipv6.devconf_dflt->proxy_ndp) - inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_PROXY_NEIGH, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); else if (valp == &net->ipv6.devconf_all->proxy_ndp) - inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_PROXY_NEIGH, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); else { struct inet6_dev *idev = ctl->extra1; - inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_PROXY_NEIGH, idev->dev->ifindex, &idev->cnf); } @@ -5827,6 +5859,105 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, return ret; } +static +void addrconf_set_nopolicy(struct rt6_info *rt, int action) +{ + if (rt) { + if (action) + rt->dst.flags |= DST_NOPOLICY; + else + rt->dst.flags &= ~DST_NOPOLICY; + } +} + +static +void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) +{ + struct inet6_ifaddr *ifa; + + read_lock_bh(&idev->lock); + list_for_each_entry(ifa, &idev->addr_list, if_list) { + spin_lock(&ifa->lock); + if (ifa->rt) { + struct rt6_info *rt = ifa->rt; + struct fib6_table *table = rt->rt6i_table; + int cpu; + + read_lock(&table->tb6_lock); + addrconf_set_nopolicy(ifa->rt, val); + if (rt->rt6i_pcpu) { + for_each_possible_cpu(cpu) { + struct rt6_info **rtp; + + rtp = per_cpu_ptr(rt->rt6i_pcpu, cpu); + addrconf_set_nopolicy(*rtp, val); + } + } + read_unlock(&table->tb6_lock); + } + spin_unlock(&ifa->lock); + } + read_unlock_bh(&idev->lock); +} + +static +int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val) +{ + struct inet6_dev *idev; + struct net *net; + + if (!rtnl_trylock()) + return restart_syscall(); + + *valp = val; + + net = (struct net *)ctl->extra2; + if (valp == &net->ipv6.devconf_dflt->disable_policy) { + rtnl_unlock(); + return 0; + } + + if (valp == &net->ipv6.devconf_all->disable_policy) { + struct net_device *dev; + + for_each_netdev(net, dev) { + idev = __in6_dev_get(dev); + if (idev) + addrconf_disable_policy_idev(idev, val); + } + } else { + idev = (struct inet6_dev *)ctl->extra1; + addrconf_disable_policy_idev(idev, val); + } + + rtnl_unlock(); + return 0; +} + +static +int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int *valp = ctl->data; + int val = *valp; + loff_t pos = *ppos; + struct ctl_table lctl; + int ret; + + lctl = *ctl; + lctl.data = &val; + ret = proc_dointvec(&lctl, write, buffer, lenp, ppos); + + if (write && (*valp != val)) + ret = addrconf_disable_policy(ctl, valp, val); + + if (ret) + *ppos = pos; + + return ret; +} + static int minus_one = -1; static const int one = 1; static const int two_five_five = 255; @@ -6015,6 +6146,13 @@ static const struct ctl_table addrconf_sysctl[] = { }, #ifdef CONFIG_IPV6_ROUTE_INFO { + .procname = "accept_ra_rt_info_min_plen", + .data = &ipv6_devconf.accept_ra_rt_info_min_plen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "accept_ra_rt_info_max_plen", .data = &ipv6_devconf.accept_ra_rt_info_max_plen, .maxlen = sizeof(int), @@ -6185,6 +6323,13 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = addrconf_sysctl_addr_gen_mode, }, { + .procname = "disable_policy", + .data = &ipv6_devconf.disable_policy, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_disable_policy, + }, + { /* sentinel */ } }; @@ -6224,7 +6369,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, ifindex = NETCONFA_IFINDEX_DEFAULT; else ifindex = idev->dev->ifindex; - inet6_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p); + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, + ifindex, p); return 0; free: @@ -6233,7 +6379,8 @@ out: return -ENOBUFS; } -static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) +static void __addrconf_sysctl_unregister(struct net *net, + struct ipv6_devconf *p, int ifindex) { struct ctl_table *table; @@ -6244,6 +6391,8 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) unregister_net_sysctl_table(p->sysctl_header); p->sysctl_header = NULL; kfree(table); + + inet6_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL); } static int addrconf_sysctl_register(struct inet6_dev *idev) @@ -6267,7 +6416,8 @@ static int addrconf_sysctl_register(struct inet6_dev *idev) static void addrconf_sysctl_unregister(struct inet6_dev *idev) { - __addrconf_sysctl_unregister(&idev->cnf); + __addrconf_sysctl_unregister(dev_net(idev->dev), &idev->cnf, + idev->dev->ifindex); neigh_sysctl_unregister(idev->nd_parms); } @@ -6310,7 +6460,7 @@ static int __net_init addrconf_init_net(struct net *net) #ifdef CONFIG_SYSCTL err_reg_dflt: - __addrconf_sysctl_unregister(all); + __addrconf_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL); err_reg_all: kfree(dflt); #endif @@ -6323,8 +6473,10 @@ err_alloc_all: static void __net_exit addrconf_exit_net(struct net *net) { #ifdef CONFIG_SYSCTL - __addrconf_sysctl_unregister(net->ipv6.devconf_dflt); - __addrconf_sysctl_unregister(net->ipv6.devconf_all); + __addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt, + NETCONFA_IFINDEX_DEFAULT); + __addrconf_sysctl_unregister(net, net->ipv6.devconf_all, + NETCONFA_IFINDEX_ALL); #endif kfree(net->ipv6.devconf_dflt); kfree(net->ipv6.devconf_all); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index a8f6986dcbe5..6cb4ed91722a 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -413,7 +413,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh) u32 label; int err = 0; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); + err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, NULL); if (err < 0) return err; @@ -532,7 +532,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh) struct ip6addrlbl_entry *p; struct sk_buff *skb; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); + err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, NULL); if (err < 0) return err; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a9a9553ee63d..1635d218735e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1005,6 +1005,10 @@ static int __init inet6_init(void) if (err) goto seg6_fail; + err = igmp6_late_init(); + if (err) + goto igmp6_late_err; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -1015,8 +1019,10 @@ out: #ifdef CONFIG_SYSCTL sysctl_fail: - seg6_exit(); + igmp6_late_cleanup(); #endif +igmp6_late_err: + seg6_exit(); seg6_fail: calipso_exit(); calipso_fail: diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index ce1aae4a7fc8..b3df03e3faa0 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -146,8 +146,7 @@ static int ila_build_state(struct nlattr *nla, return -EINVAL; } - ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, - ila_nl_policy); + ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, NULL); if (ret < 0) return ret; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index aacfb4bce153..b04539dd4629 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -49,6 +49,8 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + void (*edemux)(struct sk_buff *skb); + /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing */ @@ -60,8 +62,8 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) const struct inet6_protocol *ipprot; ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); - if (ipprot && ipprot->early_demux) - ipprot->early_demux(skb); + if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) + edemux(skb); } if (!skb_valid_dst(skb)) ip6_route_input(skb); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 6ba6c900ebcf..fb4546e80c82 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -815,7 +815,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head) in6_dev = __in6_dev_get(dev); if (in6_dev) { in6_dev->cnf.mc_forwarding--; - inet6_netconf_notify_devconf(dev_net(dev), + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); } @@ -974,7 +974,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt, in6_dev = __in6_dev_get(dev); if (in6_dev) { in6_dev->cnf.mc_forwarding++; - inet6_netconf_notify_devconf(dev_net(dev), + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); } @@ -1599,7 +1599,8 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk) write_unlock_bh(&mrt_lock); if (!err) - inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); rtnl_unlock(); @@ -1620,7 +1621,7 @@ int ip6mr_sk_done(struct sock *sk) mrt->mroute6_sk = NULL; net->ipv6.devconf_all->mc_forwarding--; write_unlock_bh(&mrt_lock); - inet6_netconf_notify_devconf(net, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 1bdc703cb966..07403fa164e1 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2463,7 +2463,6 @@ static void mld_ifc_event(struct inet6_dev *idev) mld_ifc_start_timer(idev, 1); } - static void igmp6_timer_handler(unsigned long data) { struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data; @@ -2599,6 +2598,44 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) write_unlock_bh(&idev->lock); } +static void ipv6_mc_rejoin_groups(struct inet6_dev *idev) +{ + struct ifmcaddr6 *pmc; + + ASSERT_RTNL(); + + if (mld_in_v1_mode(idev)) { + read_lock_bh(&idev->lock); + for (pmc = idev->mc_list; pmc; pmc = pmc->next) + igmp6_join_group(pmc); + read_unlock_bh(&idev->lock); + } else + mld_send_report(idev, NULL); +} + +static int ipv6_mc_netdev_event(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct inet6_dev *idev = __in6_dev_get(dev); + + switch (event) { + case NETDEV_RESEND_IGMP: + if (idev) + ipv6_mc_rejoin_groups(idev); + break; + default: + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block igmp6_netdev_notifier = { + .notifier_call = ipv6_mc_netdev_event, +}; + #ifdef CONFIG_PROC_FS struct igmp6_mc_iter_state { struct seq_net_private p; @@ -2970,7 +3007,17 @@ int __init igmp6_init(void) return register_pernet_subsys(&igmp6_net_ops); } +int __init igmp6_late_init(void) +{ + return register_netdevice_notifier(&igmp6_netdev_notifier); +} + void igmp6_cleanup(void) { unregister_pernet_subsys(&igmp6_net_ops); } + +void igmp6_late_cleanup(void) +{ + unregister_netdevice_notifier(&igmp6_netdev_notifier); +} diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7ebac630d3c6..b5812b3f7539 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -732,7 +732,7 @@ void ndisc_update(const struct net_device *dev, struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type, struct ndisc_options *ndopts) { - neigh_update(neigh, lladdr, new, flags); + neigh_update(neigh, lladdr, new, flags, 0); /* report ndisc ops about neighbour update */ ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts); } @@ -1418,6 +1418,8 @@ skip_linkparms: if (ri->prefix_len == 0 && !in6_dev->cnf.accept_ra_defrtr) continue; + if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen) + continue; if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) continue; rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 765facf03d45..e8d88d82636b 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -159,7 +159,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { - nft_fib_store_result(dest, priv->result, pkt, + nft_fib_store_result(dest, priv, pkt, nft_in(pkt)->ifindex); return; } diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 6c5b5b1830a7..4146536e9c15 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -27,10 +27,10 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); range.flags = priv->flags; if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); } regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, nft_out(pkt)); diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index f5ac080fc084..a27e424f690d 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -26,10 +26,10 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min], - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max], + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index e3770abe688a..b5d54d4f995c 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -26,7 +26,7 @@ #include <net/protocol.h> #if IS_ENABLED(CONFIG_IPV6) -const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; +struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; EXPORT_SYMBOL(inet6_protos); int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 35c58b669ebd..ccde23eba702 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2906,7 +2906,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int pref; int err; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, + NULL); if (err < 0) goto errout; @@ -3423,6 +3424,8 @@ static int rt6_fill_node(struct net *net, } else if (rt->rt6i_flags & RTF_LOCAL) rtm->rtm_type = RTN_LOCAL; + else if (rt->rt6i_flags & RTF_ANYCAST) + rtm->rtm_type = RTN_ANYCAST; else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) rtm->rtm_type = RTN_LOCAL; else @@ -3572,7 +3575,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) struct flowi6 fl6; int err, iif = 0, oif = 0; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, + NULL); if (err < 0) goto errout; diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 85582257d3af..7436a4a62f3e 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -26,17 +26,13 @@ #include <linux/seg6_iptunnel.h> #include <net/addrconf.h> #include <net/ip6_route.h> -#ifdef CONFIG_DST_CACHE #include <net/dst_cache.h> -#endif #ifdef CONFIG_IPV6_SEG6_HMAC #include <net/seg6_hmac.h> #endif struct seg6_lwt { -#ifdef CONFIG_DST_CACHE struct dst_cache cache; -#endif struct seg6_iptunnel_encap tuninfo[0]; }; @@ -105,7 +101,7 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) hdrlen = (osrh->hdrlen + 1) << 3; tot_len = hdrlen + sizeof(*hdr); - err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC); + err = skb_cow_head(skb, tot_len); if (unlikely(err)) return err; @@ -156,7 +152,7 @@ static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) hdrlen = (osrh->hdrlen + 1) << 3; - err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC); + err = skb_cow_head(skb, hdrlen); if (unlikely(err)) return err; @@ -237,6 +233,9 @@ static int seg6_do_srh(struct sk_buff *skb) static int seg6_input(struct sk_buff *skb) { + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; + struct seg6_lwt *slwt; int err; err = seg6_do_srh(skb); @@ -245,8 +244,26 @@ static int seg6_input(struct sk_buff *skb) return err; } + slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + + preempt_disable(); + dst = dst_cache_get(&slwt->cache); + preempt_enable(); + skb_dst_drop(skb); - ip6_route_input(skb); + + if (!dst) { + ip6_route_input(skb); + dst = skb_dst(skb); + if (!dst->error) { + preempt_disable(); + dst_cache_set_ip6(&slwt->cache, dst, + &ipv6_hdr(skb)->saddr); + preempt_enable(); + } + } else { + skb_dst_set(skb, dst); + } return dst_input(skb); } @@ -264,11 +281,9 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); -#ifdef CONFIG_DST_CACHE preempt_disable(); dst = dst_cache_get(&slwt->cache); preempt_enable(); -#endif if (unlikely(!dst)) { struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -287,11 +302,9 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto drop; } -#ifdef CONFIG_DST_CACHE preempt_disable(); dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); preempt_enable(); -#endif } skb_dst_drop(skb); @@ -315,7 +328,7 @@ static int seg6_build_state(struct nlattr *nla, int err; err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla, - seg6_iptunnel_policy); + seg6_iptunnel_policy, NULL); if (err < 0) return err; @@ -355,13 +368,11 @@ static int seg6_build_state(struct nlattr *nla, slwt = seg6_lwt_lwtunnel(newts); -#ifdef CONFIG_DST_CACHE err = dst_cache_init(&slwt->cache, GFP_KERNEL); if (err) { kfree(newts); return err; } -#endif memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); @@ -375,12 +386,10 @@ static int seg6_build_state(struct nlattr *nla, return 0; } -#ifdef CONFIG_DST_CACHE static void seg6_destroy_state(struct lwtunnel_state *lwt) { dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); } -#endif static int seg6_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) @@ -414,9 +423,7 @@ static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) static const struct lwtunnel_encap_ops seg6_iptun_ops = { .build_state = seg6_build_state, -#ifdef CONFIG_DST_CACHE .destroy_state = seg6_destroy_state, -#endif .output = seg6_output, .input = seg6_input, .fill_encap = seg6_fill_encap_info, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 49fa2e8c3fa9..8e42e8f54b70 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -101,12 +101,12 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) } } -static u32 tcp_v6_init_sequence(const struct sk_buff *skb, u32 *tsoff) +static u32 tcp_v6_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff) { - return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, - ipv6_hdr(skb)->saddr.s6_addr32, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source, tsoff); + return secure_tcpv6_seq_and_tsoff(ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source, tsoff); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, @@ -265,11 +265,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_gso_type = SKB_GSO_TCPV6; ip6_dst_store(sk, dst, NULL, NULL); - if (tcp_death_row->sysctl_tw_recycle && - !tp->rx_opt.ts_recent_stamp && - ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr)) - tcp_fetch_timewait_stamp(sk, dst); - icsk->icsk_ext_hdr_len = 0; if (opt) icsk->icsk_ext_hdr_len = opt->opt_flen + @@ -287,11 +282,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk_set_txhash(sk); if (likely(!tp->repair)) { - seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32, - inet->inet_sport, - inet->inet_dport, - &tp->tsoffset); + seq = secure_tcpv6_seq_and_tsoff(np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + inet->inet_sport, + inet->inet_dport, + &tp->tsoffset); if (!tp->write_seq) tp->write_seq = seq; } @@ -727,11 +722,8 @@ static void tcp_v6_init_req(struct request_sock *req, static struct dst_entry *tcp_v6_route_req(const struct sock *sk, struct flowi *fl, - const struct request_sock *req, - bool *strict) + const struct request_sock *req) { - if (strict) - *strict = true; return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); } @@ -757,7 +749,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .cookie_init_seq = cookie_v6_init_sequence, #endif .route_req = tcp_v6_route_req, - .init_seq = tcp_v6_init_sequence, + .init_seq_tsoff = tcp_v6_init_seq_and_tsoff, .send_synack = tcp_v6_send_synack, }; @@ -1301,8 +1293,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { - sock_rps_save_rxhash(nsk, skb); - sk_mark_napi_id(nsk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) @@ -1933,8 +1923,9 @@ struct proto tcpv6_prot = { .diag_destroy = tcp_abort, }; -static const struct inet6_protocol tcpv6_protocol = { +static struct inet6_protocol tcpv6_protocol = { .early_demux = tcp_v6_early_demux, + .early_demux_handler = tcp_v6_early_demux, .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4e4c401e3bc6..fd4b1c98a472 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -864,6 +864,64 @@ discard: return 0; } +static struct sock *__udp6_lib_demux_lookup(struct net *net, + __be16 loc_port, const struct in6_addr *loc_addr, + __be16 rmt_port, const struct in6_addr *rmt_addr, + int dif) +{ + struct sock *sk; + + rcu_read_lock(); + sk = __udp6_lib_lookup(net, rmt_addr, rmt_port, loc_addr, loc_port, + dif, &udp_table, NULL); + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + rcu_read_unlock(); + + return sk; +} + +static void udp_v6_early_demux(struct sk_buff *skb) +{ + struct net *net = dev_net(skb->dev); + const struct udphdr *uh; + struct sock *sk; + struct dst_entry *dst; + int dif = skb->dev->ifindex; + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + + sizeof(struct udphdr))) + return; + + uh = udp_hdr(skb); + + if (skb->pkt_type == PACKET_HOST) + sk = __udp6_lib_demux_lookup(net, uh->dest, + &ipv6_hdr(skb)->daddr, + uh->source, &ipv6_hdr(skb)->saddr, + dif); + else + return; + + if (!sk) + return; + + skb->sk = sk; + skb->destructor = sock_efree; + dst = READ_ONCE(sk->sk_rx_dst); + + if (dst) + dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); + if (dst) { + if (dst->flags & DST_NOCACHE) { + if (likely(atomic_inc_not_zero(&dst->__refcnt))) + skb_dst_set(skb, dst); + } else { + skb_dst_set_noref(skb, dst); + } + } +} + static __inline__ int udpv6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP); @@ -1035,6 +1093,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = -1; ipc6.tclass = -1; ipc6.dontfrag = -1; + sockc.tsflags = sk->sk_tsflags; /* destination address check */ if (sin6) { @@ -1159,7 +1218,6 @@ do_udp_sendmsg: fl6.flowi6_mark = sk->sk_mark; fl6.flowi6_uid = sk->sk_uid; - sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { opt = &opt_space; @@ -1378,7 +1436,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, } #endif -static const struct inet6_protocol udpv6_protocol = { +static struct inet6_protocol udpv6_protocol = { + .early_demux = udp_v6_early_demux, + .early_demux_handler = udp_v6_early_demux, .handler = udpv6_rcv, .err_handler = udpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 309062f3debe..31762f76cdb5 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1687,7 +1687,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct kcm_attach info; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_attach_ioctl(sock, &info); @@ -1697,7 +1697,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct kcm_unattach info; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_unattach_ioctl(sock, &info); @@ -1708,7 +1708,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct socket *newsock = NULL; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_clone(sock, &info, &newsock); diff --git a/net/key/af_key.c b/net/key/af_key.c index c6252ed42c1d..60cf2fb78d45 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3792,7 +3792,6 @@ static inline void pfkey_exit_proc(struct net *net) static struct xfrm_mgr pfkeyv2_mgr = { - .id = "pfkeyv2", .notify = pfkey_send_notify, .acquire = pfkey_send_acquire, .compile_policy = pfkey_compile_policy, diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 8adab6335ced..fa0342574b89 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -120,7 +120,7 @@ static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) return sk->sk_user_data; } -static inline struct l2tp_net *l2tp_pernet(struct net *net) +static inline struct l2tp_net *l2tp_pernet(const struct net *net) { BUG_ON(!net); @@ -217,27 +217,6 @@ static void l2tp_tunnel_sock_put(struct sock *sk) sock_put(sk); } -/* Lookup a session by id in the global session list - */ -static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id) -{ - struct l2tp_net *pn = l2tp_pernet(net); - struct hlist_head *session_list = - l2tp_session_id_hash_2(pn, session_id); - struct l2tp_session *session; - - rcu_read_lock_bh(); - hlist_for_each_entry_rcu(session, session_list, global_hlist) { - if (session->session_id == session_id) { - rcu_read_unlock_bh(); - return session; - } - } - rcu_read_unlock_bh(); - - return NULL; -} - /* Session hash list. * The session_id SHOULD be random according to RFC2661, but several * L2TP implementations (Cisco and Microsoft) use incrementing @@ -250,25 +229,46 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id) return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)]; } -/* Lookup a session by id +/* Lookup a session. A new reference is held on the returned session. + * Optionally calls session->ref() too if do_ref is true. */ -struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id) +struct l2tp_session *l2tp_session_get(const struct net *net, + struct l2tp_tunnel *tunnel, + u32 session_id, bool do_ref) { struct hlist_head *session_list; struct l2tp_session *session; - /* In L2TPv3, session_ids are unique over all tunnels and we - * sometimes need to look them up before we know the - * tunnel. - */ - if (tunnel == NULL) - return l2tp_session_find_2(net, session_id); + if (!tunnel) { + struct l2tp_net *pn = l2tp_pernet(net); + + session_list = l2tp_session_id_hash_2(pn, session_id); + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu(session, session_list, global_hlist) { + if (session->session_id == session_id) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); + rcu_read_unlock_bh(); + + return session; + } + } + rcu_read_unlock_bh(); + + return NULL; + } session_list = l2tp_session_id_hash(tunnel, session_id); read_lock_bh(&tunnel->hlist_lock); hlist_for_each_entry(session, session_list, hlist) { if (session->session_id == session_id) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); read_unlock_bh(&tunnel->hlist_lock); + return session; } } @@ -276,9 +276,10 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find); +EXPORT_SYMBOL_GPL(l2tp_session_get); -struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, + bool do_ref) { int hash; struct l2tp_session *session; @@ -288,6 +289,9 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { if (++count > nth) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); read_unlock_bh(&tunnel->hlist_lock); return session; } @@ -298,12 +302,14 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find_nth); +EXPORT_SYMBOL_GPL(l2tp_session_get_nth); /* Lookup a session by interface name. * This is very inefficient but is only used by management interfaces. */ -struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) +struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, + const char *ifname, + bool do_ref) { struct l2tp_net *pn = l2tp_pernet(net); int hash; @@ -313,7 +319,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) { hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) { if (!strcmp(session->ifname, ifname)) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); rcu_read_unlock_bh(); + return session; } } @@ -323,11 +333,53 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname); +EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname); + +static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, + struct l2tp_session *session) +{ + struct l2tp_session *session_walk; + struct hlist_head *g_head; + struct hlist_head *head; + struct l2tp_net *pn; + + head = l2tp_session_id_hash(tunnel, session->session_id); + + write_lock_bh(&tunnel->hlist_lock); + hlist_for_each_entry(session_walk, head, hlist) + if (session_walk->session_id == session->session_id) + goto exist; + + if (tunnel->version == L2TP_HDR_VER_3) { + pn = l2tp_pernet(tunnel->l2tp_net); + g_head = l2tp_session_id_hash_2(l2tp_pernet(tunnel->l2tp_net), + session->session_id); + + spin_lock_bh(&pn->l2tp_session_hlist_lock); + hlist_for_each_entry(session_walk, g_head, global_hlist) + if (session_walk->session_id == session->session_id) + goto exist_glob; + + hlist_add_head_rcu(&session->global_hlist, g_head); + spin_unlock_bh(&pn->l2tp_session_hlist_lock); + } + + hlist_add_head(&session->hlist, head); + write_unlock_bh(&tunnel->hlist_lock); + + return 0; + +exist_glob: + spin_unlock_bh(&pn->l2tp_session_hlist_lock); +exist: + write_unlock_bh(&tunnel->hlist_lock); + + return -EEXIST; +} /* Lookup a tunnel by id */ -struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id) +struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id) { struct l2tp_tunnel *tunnel; struct l2tp_net *pn = l2tp_pernet(net); @@ -345,7 +397,7 @@ struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id) } EXPORT_SYMBOL_GPL(l2tp_tunnel_find); -struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth) +struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth) { struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_tunnel *tunnel; @@ -633,6 +685,9 @@ discard: * a data (not control) frame before coming here. Fields up to the * session-id have already been parsed and ptr points to the data * after the session-id. + * + * session->ref() must have been called prior to l2tp_recv_common(). + * session->deref() will be called automatically after skb is processed. */ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, @@ -642,14 +697,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, int offset; u32 ns, nr; - /* The ref count is increased since we now hold a pointer to - * the session. Take care to decrement the refcnt when exiting - * this function from now on... - */ - l2tp_session_inc_refcount(session); - if (session->ref) - (*session->ref)(session); - /* Parse and check optional cookie */ if (session->peer_cookie_len > 0) { if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) { @@ -802,8 +849,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, /* Try to dequeue as many skbs from reorder_q as we can. */ l2tp_recv_dequeue(session); - l2tp_session_dec_refcount(session); - return; discard: @@ -812,8 +857,6 @@ discard: if (session->deref) (*session->deref)(session); - - l2tp_session_dec_refcount(session); } EXPORT_SYMBOL(l2tp_recv_common); @@ -920,8 +963,14 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, } /* Find the session context */ - session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id); + session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true); if (!session || !session->recv_skb) { + if (session) { + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + } + /* Not found? Pass to userspace to deal with */ l2tp_info(tunnel, L2TP_MSG_DATA, "%s: no session found (%u/%u). Passing up.\n", @@ -930,6 +979,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, } l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook); + l2tp_session_dec_refcount(session); return 0; @@ -1738,6 +1788,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { struct l2tp_session *session; + int err; session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL); if (session != NULL) { @@ -1793,6 +1844,13 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn l2tp_session_set_header_len(session, tunnel->version); + err = l2tp_session_add_to_tunnel(tunnel, session); + if (err) { + kfree(session); + + return ERR_PTR(err); + } + /* Bump the reference count. The session context is deleted * only when this drops to zero. */ @@ -1802,28 +1860,14 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn /* Ensure tunnel socket isn't deleted */ sock_hold(tunnel->sock); - /* Add session to the tunnel's hash list */ - write_lock_bh(&tunnel->hlist_lock); - hlist_add_head(&session->hlist, - l2tp_session_id_hash(tunnel, session_id)); - write_unlock_bh(&tunnel->hlist_lock); - - /* And to the global session list if L2TPv3 */ - if (tunnel->version != L2TP_HDR_VER_2) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - - spin_lock_bh(&pn->l2tp_session_hlist_lock); - hlist_add_head_rcu(&session->global_hlist, - l2tp_session_id_hash_2(pn, session_id)); - spin_unlock_bh(&pn->l2tp_session_hlist_lock); - } - /* Ignore management session in session count value */ if (session->session_id != 0) atomic_inc(&l2tp_session_count); + + return session; } - return session; + return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(l2tp_session_create); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index aebf281d09ee..eec5ad2ebb93 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -230,13 +230,16 @@ out: return tunnel; } -struct l2tp_session *l2tp_session_find(struct net *net, - struct l2tp_tunnel *tunnel, - u32 session_id); -struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); -struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); -struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); -struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); +struct l2tp_session *l2tp_session_get(const struct net *net, + struct l2tp_tunnel *tunnel, + u32 session_id, bool do_ref); +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, + bool do_ref); +struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, + const char *ifname, + bool do_ref); +struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id); +struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth); int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 2d6760a2ae34..d100aed3d06f 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd) static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd) { - pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); pd->session_idx++; if (pd->session == NULL) { @@ -238,10 +238,14 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v) } /* Show the tunnel or session context */ - if (pd->session == NULL) + if (!pd->session) { l2tp_dfs_seq_tunnel_show(m, pd->tunnel); - else + } else { l2tp_dfs_seq_session_show(m, pd->session); + if (pd->session->deref) + pd->session->deref(pd->session); + l2tp_session_dec_refcount(pd->session); + } out: return 0; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 8bf18a5f66e0..138566a63123 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -30,6 +30,9 @@ #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> #include "l2tp_core.h" @@ -204,6 +207,53 @@ static void l2tp_eth_show(struct seq_file *m, void *arg) } #endif +static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel, + struct l2tp_session *session, + struct net_device *dev) +{ + unsigned int overhead = 0; + struct dst_entry *dst; + u32 l3_overhead = 0; + + /* if the encap is UDP, account for UDP header size */ + if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { + overhead += sizeof(struct udphdr); + dev->needed_headroom += sizeof(struct udphdr); + } + if (session->mtu != 0) { + dev->mtu = session->mtu; + dev->needed_headroom += session->hdr_len; + return; + } + l3_overhead = kernel_sock_ip_overhead(tunnel->sock); + if (l3_overhead == 0) { + /* L3 Overhead couldn't be identified, this could be + * because tunnel->sock was NULL or the socket's + * address family was not IPv4 or IPv6, + * dev mtu stays at 1500. + */ + return; + } + /* Adjust MTU, factor overhead - underlay L3, overlay L2 hdr + * UDP overhead, if any, was already factored in above. + */ + overhead += session->hdr_len + ETH_HLEN + l3_overhead; + + /* If PMTU discovery was enabled, use discovered MTU on L2TP device */ + dst = sk_dst_get(tunnel->sock); + if (dst) { + /* dst_mtu will use PMTU if found, else fallback to intf MTU */ + u32 pmtu = dst_mtu(dst); + + if (pmtu != 0) + dev->mtu = pmtu; + dst_release(dst); + } + session->mtu = dev->mtu - overhead; + dev->mtu = session->mtu; + dev->needed_headroom += session->hdr_len; +} + static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { struct net_device *dev; @@ -221,12 +271,6 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p goto out; } - session = l2tp_session_find(net, tunnel, session_id); - if (session) { - rc = -EEXIST; - goto out; - } - if (cfg->ifname) { dev = dev_get_by_name(net, cfg->ifname); if (dev) { @@ -240,8 +284,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p session = l2tp_session_create(sizeof(*spriv), tunnel, session_id, peer_session_id, cfg); - if (!session) { - rc = -ENOMEM; + if (IS_ERR(session)) { + rc = PTR_ERR(session); goto out; } @@ -253,12 +297,9 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p } dev_net_set(dev, net); - if (session->mtu == 0) - session->mtu = dev->mtu - session->hdr_len; - dev->mtu = session->mtu; - dev->needed_headroom += session->hdr_len; dev->min_mtu = 0; dev->max_mtu = ETH_MAX_MTU; + l2tp_eth_adjust_mtu(tunnel, session, dev); priv = netdev_priv(dev); priv->dev = dev; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index d25038cfd64e..4d322c1b7233 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -143,19 +143,19 @@ static int l2tp_ip_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_find(net, NULL, session_id); - if (session == NULL) + session = l2tp_session_get(net, NULL, session_id, true); + if (!session) goto discard; tunnel = session->tunnel; - if (tunnel == NULL) - goto discard; + if (!tunnel) + goto discard_sess; /* Trace packet contents, if enabled */ if (tunnel->debug & L2TP_MSG_DATA) { length = min(32u, skb->len); if (!pskb_may_pull(skb, length)) - goto discard; + goto discard_sess; /* Point to L2TP header */ optr = ptr = skb->data; @@ -165,6 +165,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) } l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook); + l2tp_session_dec_refcount(session); return 0; @@ -178,9 +179,10 @@ pass_up: tunnel_id = ntohl(*(__be32 *) &skb->data[4]); tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel != NULL) + if (tunnel) { sk = tunnel->sock; - else { + sock_hold(sk); + } else { struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); @@ -202,6 +204,12 @@ pass_up: return sk_receive_skb(sk, skb, 1); +discard_sess: + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + goto discard; + discard_put: sock_put(sk); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index a4abcbc4c09a..88b397c30d86 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -156,19 +156,19 @@ static int l2tp_ip6_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_find(net, NULL, session_id); - if (session == NULL) + session = l2tp_session_get(net, NULL, session_id, true); + if (!session) goto discard; tunnel = session->tunnel; - if (tunnel == NULL) - goto discard; + if (!tunnel) + goto discard_sess; /* Trace packet contents, if enabled */ if (tunnel->debug & L2TP_MSG_DATA) { length = min(32u, skb->len); if (!pskb_may_pull(skb, length)) - goto discard; + goto discard_sess; /* Point to L2TP header */ optr = ptr = skb->data; @@ -179,6 +179,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb) l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook); + l2tp_session_dec_refcount(session); + return 0; pass_up: @@ -191,9 +193,10 @@ pass_up: tunnel_id = ntohl(*(__be32 *) &skb->data[4]); tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel != NULL) + if (tunnel) { sk = tunnel->sock; - else { + sock_hold(sk); + } else { struct ipv6hdr *iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); @@ -215,6 +218,12 @@ pass_up: return sk_receive_skb(sk, skb, 1); +discard_sess: + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + goto discard; + discard_put: sock_put(sk); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 3620fba31786..12cfcd0ca807 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -48,7 +48,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, /* Accessed under genl lock */ static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX]; -static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info) +static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info, + bool do_ref) { u32 tunnel_id; u32 session_id; @@ -59,14 +60,15 @@ static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info) if (info->attrs[L2TP_ATTR_IFNAME]) { ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]); - session = l2tp_session_find_by_ifname(net, ifname); + session = l2tp_session_get_by_ifname(net, ifname, do_ref); } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) && (info->attrs[L2TP_ATTR_CONN_ID])) { tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); tunnel = l2tp_tunnel_find(net, tunnel_id); if (tunnel) - session = l2tp_session_find(net, tunnel, session_id); + session = l2tp_session_get(net, tunnel, session_id, + do_ref); } return session; @@ -519,11 +521,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf goto out; } session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); - session = l2tp_session_find(net, tunnel, session_id); - if (session) { - ret = -EEXIST; - goto out; - } if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) { ret = -EINVAL; @@ -642,10 +639,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf session_id, peer_session_id, &cfg); if (ret >= 0) { - session = l2tp_session_find(net, tunnel, session_id); - if (session) + session = l2tp_session_get(net, tunnel, session_id, false); + if (session) { ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_CREATE); + l2tp_session_dec_refcount(session); + } } out: @@ -658,7 +657,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf struct l2tp_session *session; u16 pw_type; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, true); if (session == NULL) { ret = -ENODEV; goto out; @@ -672,6 +671,10 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete) ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session); + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + out: return ret; } @@ -681,7 +684,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf int ret = 0; struct l2tp_session *session; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, false); if (session == NULL) { ret = -ENODEV; goto out; @@ -716,6 +719,8 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_MODIFY); + l2tp_session_dec_refcount(session); + out: return ret; } @@ -811,29 +816,34 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; int ret; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, false); if (session == NULL) { ret = -ENODEV; - goto out; + goto err; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto out; + goto err_ref; } ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, 0, session, L2TP_CMD_SESSION_GET); if (ret < 0) - goto err_out; + goto err_ref_msg; - return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); + ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); -err_out: - nlmsg_free(msg); + l2tp_session_dec_refcount(session); -out: + return ret; + +err_ref_msg: + nlmsg_free(msg); +err_ref: + l2tp_session_dec_refcount(session); +err: return ret; } @@ -852,7 +862,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback goto out; } - session = l2tp_session_find_nth(tunnel, si); + session = l2tp_session_get_nth(tunnel, si, false); if (session == NULL) { ti++; tunnel = NULL; @@ -862,8 +872,11 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - session, L2TP_CMD_SESSION_GET) < 0) + session, L2TP_CMD_SESSION_GET) < 0) { + l2tp_session_dec_refcount(session); break; + } + l2tp_session_dec_refcount(session); si++; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 36cc56fd0418..32ea0f3d868c 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -450,6 +450,10 @@ static void pppol2tp_session_close(struct l2tp_session *session) static void pppol2tp_session_destruct(struct sock *sk) { struct l2tp_session *session = sk->sk_user_data; + + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); + if (session) { sk->sk_user_data = NULL; BUG_ON(session->magic != L2TP_SESSION_MAGIC); @@ -488,9 +492,6 @@ static int pppol2tp_release(struct socket *sock) l2tp_session_queue_purge(session); sock_put(sk); } - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); - release_sock(sk); /* This will delete the session context via @@ -582,6 +583,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, int error = 0; u32 tunnel_id, peer_tunnel_id; u32 session_id, peer_session_id; + bool drop_refcnt = false; int ver = 2; int fd; @@ -683,36 +685,36 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, if (tunnel->peer_tunnel_id == 0) tunnel->peer_tunnel_id = peer_tunnel_id; - /* Create session if it doesn't already exist. We handle the - * case where a session was previously created by the netlink - * interface by checking that the session doesn't already have - * a socket and its tunnel socket are what we expect. If any - * of those checks fail, return EEXIST to the caller. - */ - session = l2tp_session_find(sock_net(sk), tunnel, session_id); - if (session == NULL) { - /* Default MTU must allow space for UDP/L2TP/PPP - * headers. + session = l2tp_session_get(sock_net(sk), tunnel, session_id, false); + if (session) { + drop_refcnt = true; + ps = l2tp_session_priv(session); + + /* Using a pre-existing session is fine as long as it hasn't + * been connected yet. */ - cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD; + if (ps->sock) { + error = -EEXIST; + goto end; + } - /* Allocate and initialize a new session context. */ - session = l2tp_session_create(sizeof(struct pppol2tp_session), - tunnel, session_id, - peer_session_id, &cfg); - if (session == NULL) { - error = -ENOMEM; + /* consistency checks */ + if (ps->tunnel_sock != tunnel->sock) { + error = -EEXIST; goto end; } } else { - ps = l2tp_session_priv(session); - error = -EEXIST; - if (ps->sock != NULL) - goto end; + /* Default MTU must allow space for UDP/L2TP/PPP headers */ + cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD; + cfg.mru = cfg.mtu; - /* consistency checks */ - if (ps->tunnel_sock != tunnel->sock) + session = l2tp_session_create(sizeof(struct pppol2tp_session), + tunnel, session_id, + peer_session_id, &cfg); + if (IS_ERR(session)) { + error = PTR_ERR(session); goto end; + } } /* Associate session with its PPPoL2TP socket */ @@ -777,6 +779,8 @@ out_no_ppp: session->name); end: + if (drop_refcnt) + l2tp_session_dec_refcount(session); release_sock(sk); return error; @@ -804,12 +808,6 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i if (tunnel->sock == NULL) goto out; - /* Check that this session doesn't already exist */ - error = -EEXIST; - session = l2tp_session_find(net, tunnel, session_id); - if (session != NULL) - goto out; - /* Default MTU values. */ if (cfg->mtu == 0) cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD; @@ -817,12 +815,13 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i cfg->mru = cfg->mtu; /* Allocate and initialize a new session context. */ - error = -ENOMEM; session = l2tp_session_create(sizeof(struct pppol2tp_session), tunnel, session_id, peer_session_id, cfg); - if (session == NULL) + if (IS_ERR(session)) { + error = PTR_ERR(session); goto out; + } ps = l2tp_session_priv(session); ps->tunnel_sock = tunnel->sock; @@ -1140,11 +1139,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel, if (stats.session_id != 0) { /* resend to session ioctl handler */ struct l2tp_session *session = - l2tp_session_find(sock_net(sk), tunnel, stats.session_id); - if (session != NULL) - err = pppol2tp_session_ioctl(session, cmd, arg); - else + l2tp_session_get(sock_net(sk), tunnel, + stats.session_id, true); + + if (session) { + err = pppol2tp_session_ioctl(session, cmd, + arg); + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + } else { err = -EBADR; + } break; } #ifdef CONFIG_XFRM @@ -1377,8 +1383,6 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, } else err = pppol2tp_session_setsockopt(sk, session, optname, val); - err = 0; - end_put_sess: sock_put(sk); end: @@ -1501,8 +1505,13 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val); sock_put(ps->tunnel_sock); - } else + if (err) + goto end_put_sess; + } else { err = pppol2tp_session_getsockopt(sk, session, optname, &val); + if (err) + goto end_put_sess; + } err = -EFAULT; if (put_user(len, optlen)) @@ -1554,7 +1563,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd) static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd) { - pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); pd->session_idx++; if (pd->session == NULL) { @@ -1681,10 +1690,14 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v) /* Show the tunnel or session context. */ - if (pd->session == NULL) + if (!pd->session) { pppol2tp_seq_tunnel_show(m, pd->tunnel); - else + } else { pppol2tp_seq_session_show(m, pd->session); + if (pd->session->deref) + pd->session->deref(pd->session); + l2tp_session_dec_refcount(pd->session); + } out: return 0; @@ -1843,4 +1856,4 @@ MODULE_DESCRIPTION("PPP over L2TP over UDP"); MODULE_LICENSE("GPL"); MODULE_VERSION(PPPOL2TP_DRV_VERSION); MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP); -MODULE_ALIAS_L2TP_PWTYPE(11); +MODULE_ALIAS_L2TP_PWTYPE(7); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 40813dd3301c..5bb0c5012819 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -718,7 +718,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) ieee80211_recalc_ps(local); if (sdata->vif.type == NL80211_IFTYPE_MONITOR || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + local->ops->wake_tx_queue) { /* XXX: for AP_VLAN, actually track AP queues */ netif_tx_start_all_queues(dev); } else if (dev) { diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index 56ccffa3f2bf..62141dcec2d6 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -19,6 +19,7 @@ #ifndef __IEEE802154_I_H #define __IEEE802154_I_H +#include <linux/interrupt.h> #include <linux/mutex.h> #include <linux/hrtimer.h> #include <net/cfg802154.h> diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 33211f9a2656..07181d2273e1 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -24,6 +24,9 @@ #include <net/nexthop.h> #include "internal.h" +/* max memory we will use for mpls_route */ +#define MAX_MPLS_ROUTE_MEM 4096 + /* Maximum number of labels to look ahead at when selecting a path of * a multipath route */ @@ -32,7 +35,9 @@ #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1) static int zero = 0; +static int one = 1; static int label_limit = (1 << 20) - 1; +static int ttl_max = 255; static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, struct nlmsghdr *nlh, struct net *net, u32 portid, @@ -58,10 +63,7 @@ EXPORT_SYMBOL_GPL(mpls_output_possible); static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh) { - u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN); - int nh_index = nh - rt->rt_nh; - - return nh0_via + rt->rt_max_alen * nh_index; + return (u8 *)nh + rt->rt_via_offset; } static const u8 *mpls_nh_via(const struct mpls_route *rt, @@ -187,21 +189,32 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb) return hash; } +static struct mpls_nh *mpls_get_nexthop(struct mpls_route *rt, u8 index) +{ + return (struct mpls_nh *)((u8 *)rt->rt_nh + index * rt->rt_nh_size); +} + +/* number of alive nexthops (rt->rt_nhn_alive) and the flags for + * a next hop (nh->nh_flags) are modified by netdev event handlers. + * Since those fields can change at any moment, use READ_ONCE to + * access both. + */ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, struct sk_buff *skb) { - int alive = ACCESS_ONCE(rt->rt_nhn_alive); u32 hash = 0; int nh_index = 0; int n = 0; + u8 alive; /* No need to look further into packet if there's only * one path */ if (rt->rt_nhn == 1) - goto out; + return rt->rt_nh; - if (alive <= 0) + alive = READ_ONCE(rt->rt_nhn_alive); + if (alive == 0) return NULL; hash = mpls_multipath_hash(rt, skb); @@ -209,7 +222,9 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, if (alive == rt->rt_nhn) goto out; for_nexthops(rt) { - if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + unsigned int nh_flags = READ_ONCE(nh->nh_flags); + + if (nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) continue; if (n == nh_index) return nh; @@ -217,11 +232,11 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, } endfor_nexthops(rt); out: - return &rt->rt_nh[nh_index]; + return mpls_get_nexthop(rt, nh_index); } -static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, - struct mpls_entry_decoded dec) +static bool mpls_egress(struct net *net, struct mpls_route *rt, + struct sk_buff *skb, struct mpls_entry_decoded dec) { enum mpls_payload_type payload_type; bool success = false; @@ -246,22 +261,46 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, switch (payload_type) { case MPT_IPV4: { struct iphdr *hdr4 = ip_hdr(skb); + u8 new_ttl; skb->protocol = htons(ETH_P_IP); + + /* If propagating TTL, take the decremented TTL from + * the incoming MPLS header, otherwise decrement the + * TTL, but only if not 0 to avoid underflow. + */ + if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED || + (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT && + net->mpls.ip_ttl_propagate)) + new_ttl = dec.ttl; + else + new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0; + csum_replace2(&hdr4->check, htons(hdr4->ttl << 8), - htons(dec.ttl << 8)); - hdr4->ttl = dec.ttl; + htons(new_ttl << 8)); + hdr4->ttl = new_ttl; success = true; break; } case MPT_IPV6: { struct ipv6hdr *hdr6 = ipv6_hdr(skb); skb->protocol = htons(ETH_P_IPV6); - hdr6->hop_limit = dec.ttl; + + /* If propagating TTL, take the decremented TTL from + * the incoming MPLS header, otherwise decrement the + * hop limit, but only if not 0 to avoid underflow. + */ + if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED || + (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT && + net->mpls.ip_ttl_propagate)) + hdr6->hop_limit = dec.ttl; + else if (hdr6->hop_limit) + hdr6->hop_limit = hdr6->hop_limit - 1; success = true; break; } case MPT_UNSPEC: + /* Should have decided which protocol it is by now */ break; } @@ -361,7 +400,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, if (unlikely(!new_header_size && dec.bos)) { /* Penultimate hop popping */ - if (!mpls_egress(rt, skb, dec)) + if (!mpls_egress(dev_net(out_dev), rt, skb, dec)) goto err; } else { bool bos; @@ -412,6 +451,7 @@ static struct packet_type mpls_packet_type __read_mostly = { static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { [RTA_DST] = { .type = NLA_U32 }, [RTA_OIF] = { .type = NLA_U32 }, + [RTA_TTL_PROPAGATE] = { .type = NLA_U8 }, }; struct mpls_route_config { @@ -421,6 +461,7 @@ struct mpls_route_config { u8 rc_via_alen; u8 rc_via[MAX_VIA_ALEN]; u32 rc_label; + u8 rc_ttl_propagate; u8 rc_output_labels; u32 rc_output_label[MAX_NEW_LABELS]; u32 rc_nlflags; @@ -430,20 +471,27 @@ struct mpls_route_config { int rc_mp_len; }; -static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen) +/* all nexthops within a route have the same size based on max + * number of labels and max via length for a hop + */ +static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen, u8 max_labels) { - u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN); + u8 nh_size = MPLS_NH_SIZE(max_labels, max_alen); struct mpls_route *rt; + size_t size; - rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh), - VIA_ALEN_ALIGN) + - num_nh * max_alen_aligned, - GFP_KERNEL); - if (rt) { - rt->rt_nhn = num_nh; - rt->rt_nhn_alive = num_nh; - rt->rt_max_alen = max_alen_aligned; - } + size = sizeof(*rt) + num_nh * nh_size; + if (size > MAX_MPLS_ROUTE_MEM) + return ERR_PTR(-EINVAL); + + rt = kzalloc(size, GFP_KERNEL); + if (!rt) + return ERR_PTR(-ENOMEM); + + rt->rt_nhn = num_nh; + rt->rt_nhn_alive = num_nh; + rt->rt_nh_size = nh_size; + rt->rt_via_offset = MPLS_NH_VIA_OFF(max_labels); return rt; } @@ -648,9 +696,6 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, return -ENOMEM; err = -EINVAL; - /* Ensure only a supported number of labels are present */ - if (cfg->rc_output_labels > MAX_NEW_LABELS) - goto errout; nh->nh_labels = cfg->rc_output_labels; for (i = 0; i < nh->nh_labels; i++) @@ -675,7 +720,7 @@ errout: static int mpls_nh_build(struct net *net, struct mpls_route *rt, struct mpls_nh *nh, int oif, struct nlattr *via, - struct nlattr *newdst) + struct nlattr *newdst, u8 max_labels) { int err = -ENOMEM; @@ -683,7 +728,7 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt, goto errout; if (newdst) { - err = nla_get_labels(newdst, MAX_NEW_LABELS, + err = nla_get_labels(newdst, max_labels, &nh->nh_labels, nh->nh_label); if (err) goto errout; @@ -708,22 +753,20 @@ errout: return err; } -static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, - u8 cfg_via_alen, u8 *max_via_alen) +static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len, + u8 cfg_via_alen, u8 *max_via_alen, + u8 *max_labels) { - int nhs = 0; int remaining = len; - - if (!rtnh) { - *max_via_alen = cfg_via_alen; - return 1; - } + u8 nhs = 0; *max_via_alen = 0; + *max_labels = 0; while (rtnh_ok(rtnh, remaining)) { struct nlattr *nla, *attrs = rtnh_attrs(rtnh); int attrlen; + u8 n_labels = 0; attrlen = rtnh_attrlen(rtnh); nla = nla_find(attrs, attrlen, RTA_VIA); @@ -737,7 +780,20 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, via_alen); } + nla = nla_find(attrs, attrlen, RTA_NEWDST); + if (nla && + nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, NULL) != 0) + return 0; + + *max_labels = max_t(u8, *max_labels, n_labels); + + /* number of nexthops is tracked by a u8. + * Check for overflow. + */ + if (nhs == 255) + return 0; nhs++; + rtnh = rtnh_next(rtnh, &remaining); } @@ -746,13 +802,13 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, } static int mpls_nh_build_multi(struct mpls_route_config *cfg, - struct mpls_route *rt) + struct mpls_route *rt, u8 max_labels) { struct rtnexthop *rtnh = cfg->rc_mp; struct nlattr *nla_via, *nla_newdst; int remaining = cfg->rc_mp_len; - int nhs = 0; int err = 0; + u8 nhs = 0; change_nexthops(rt) { int attrlen; @@ -779,7 +835,8 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, } err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, - rtnh->rtnh_ifindex, nla_via, nla_newdst); + rtnh->rtnh_ifindex, nla_via, nla_newdst, + max_labels); if (err) goto errout; @@ -806,7 +863,8 @@ static int mpls_route_add(struct mpls_route_config *cfg) int err = -EINVAL; u8 max_via_alen; unsigned index; - int nhs; + u8 max_labels; + u8 nhs; index = cfg->rc_label; @@ -844,21 +902,32 @@ static int mpls_route_add(struct mpls_route_config *cfg) goto errout; err = -EINVAL; - nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, - cfg->rc_via_alen, &max_via_alen); + if (cfg->rc_mp) { + nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, + cfg->rc_via_alen, &max_via_alen, + &max_labels); + } else { + max_via_alen = cfg->rc_via_alen; + max_labels = cfg->rc_output_labels; + nhs = 1; + } + if (nhs == 0) goto errout; err = -ENOMEM; - rt = mpls_rt_alloc(nhs, max_via_alen); - if (!rt) + rt = mpls_rt_alloc(nhs, max_via_alen, max_labels); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); goto errout; + } rt->rt_protocol = cfg->rc_protocol; rt->rt_payload_type = cfg->rc_payload_type; + rt->rt_ttl_propagate = cfg->rc_ttl_propagate; if (cfg->rc_mp) - err = mpls_nh_build_multi(cfg, rt); + err = mpls_nh_build_multi(cfg, rt, max_labels); else err = mpls_nh_build_from_cfg(cfg, rt); if (err) @@ -1011,8 +1080,8 @@ static int mpls_netconf_msgsize_devconf(int type) return size; } -static void mpls_netconf_notify_devconf(struct net *net, int type, - struct mpls_dev *mdev) +static void mpls_netconf_notify_devconf(struct net *net, int event, + int type, struct mpls_dev *mdev) { struct sk_buff *skb; int err = -ENOBUFS; @@ -1021,8 +1090,7 @@ static void mpls_netconf_notify_devconf(struct net *net, int type, if (!skb) goto errout; - err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, RTM_NEWNETCONF, - 0, type); + err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, event, 0, type); if (err < 0) { /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */ WARN_ON(err == -EMSGSIZE); @@ -1054,7 +1122,7 @@ static int mpls_netconf_get_devconf(struct sk_buff *in_skb, int err; err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_mpls_policy); + devconf_mpls_policy, NULL); if (err < 0) goto errout; @@ -1155,9 +1223,8 @@ static int mpls_conf_proc(struct ctl_table *ctl, int write, if (i == offsetof(struct mpls_dev, input_enabled) && val != oval) { - mpls_netconf_notify_devconf(net, - NETCONFA_INPUT, - mdev); + mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_INPUT, mdev); } } @@ -1198,10 +1265,11 @@ static int mpls_dev_sysctl_register(struct net_device *dev, snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); - mdev->sysctl = register_net_sysctl(dev_net(dev), path, table); + mdev->sysctl = register_net_sysctl(net, path, table); if (!mdev->sysctl) goto free; + mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, mdev); return 0; free: @@ -1210,13 +1278,17 @@ out: return -ENOBUFS; } -static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev) +static void mpls_dev_sysctl_unregister(struct net_device *dev, + struct mpls_dev *mdev) { + struct net *net = dev_net(dev); struct ctl_table *table; table = mdev->sysctl->ctl_table_arg; unregister_net_sysctl_table(mdev->sysctl); kfree(table); + + mpls_netconf_notify_devconf(net, RTM_DELNETCONF, 0, mdev); } static struct mpls_dev *mpls_add_dev(struct net_device *dev) @@ -1242,11 +1314,12 @@ static struct mpls_dev *mpls_add_dev(struct net_device *dev) u64_stats_init(&mpls_stats->syncp); } + mdev->dev = dev; + err = mpls_dev_sysctl_register(dev, mdev); if (err) goto free; - mdev->dev = dev; rcu_assign_pointer(dev->mpls_ptr, mdev); return mdev; @@ -1269,6 +1342,7 @@ static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); + u8 alive, deleted; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); @@ -1278,32 +1352,49 @@ static void mpls_ifdown(struct net_device *dev, int event) if (!rt) continue; + alive = 0; + deleted = 0; change_nexthops(rt) { + unsigned int nh_flags = nh->nh_flags; + if (rtnl_dereference(nh->nh_dev) != dev) - continue; + goto next; + switch (event) { case NETDEV_DOWN: case NETDEV_UNREGISTER: - nh->nh_flags |= RTNH_F_DEAD; + nh_flags |= RTNH_F_DEAD; /* fall through */ case NETDEV_CHANGE: - nh->nh_flags |= RTNH_F_LINKDOWN; - if (event != NETDEV_UNREGISTER) - ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; + nh_flags |= RTNH_F_LINKDOWN; break; } if (event == NETDEV_UNREGISTER) RCU_INIT_POINTER(nh->nh_dev, NULL); + + if (nh->nh_flags != nh_flags) + WRITE_ONCE(nh->nh_flags, nh_flags); +next: + if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))) + alive++; + if (!rtnl_dereference(nh->nh_dev)) + deleted++; } endfor_nexthops(rt); + + WRITE_ONCE(rt->rt_nhn_alive, alive); + + /* if there are no more nexthops, delete the route */ + if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn) + mpls_route_update(net, index, NULL, NULL); } } -static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) +static void mpls_ifup(struct net_device *dev, unsigned int flags) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); unsigned index; - int alive; + u8 alive; platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { @@ -1314,20 +1405,22 @@ static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) alive = 0; change_nexthops(rt) { + unsigned int nh_flags = nh->nh_flags; struct net_device *nh_dev = rtnl_dereference(nh->nh_dev); - if (!(nh->nh_flags & nh_flags)) { + if (!(nh_flags & flags)) { alive++; continue; } if (nh_dev != dev) continue; alive++; - nh->nh_flags &= ~nh_flags; + nh_flags &= ~flags; + WRITE_ONCE(nh->nh_flags, flags); } endfor_nexthops(rt); - ACCESS_ONCE(rt->rt_nhn_alive) = alive; + WRITE_ONCE(rt->rt_nhn_alive, alive); } } @@ -1378,7 +1471,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, mpls_ifdown(dev, event); mdev = mpls_dev_get(dev); if (mdev) { - mpls_dev_sysctl_unregister(mdev); + mpls_dev_sysctl_unregister(dev, mdev); RCU_INIT_POINTER(dev->mpls_ptr, NULL); call_rcu(&mdev->rcu, mpls_dev_destroy_rcu); } @@ -1388,7 +1481,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, if (mdev) { int err; - mpls_dev_sysctl_unregister(mdev); + mpls_dev_sysctl_unregister(dev, mdev); err = mpls_dev_sysctl_register(dev, mdev); if (err) return notifier_from_errno(err); @@ -1448,16 +1541,18 @@ int nla_put_labels(struct sk_buff *skb, int attrtype, EXPORT_SYMBOL_GPL(nla_put_labels); int nla_get_labels(const struct nlattr *nla, - u32 max_labels, u8 *labels, u32 label[]) + u8 max_labels, u8 *labels, u32 label[]) { unsigned len = nla_len(nla); - unsigned nla_labels; struct mpls_shim_hdr *nla_label; + u8 nla_labels; bool bos; int i; - /* len needs to be an even multiple of 4 (the label size) */ - if (len & 3) + /* len needs to be an even multiple of 4 (the label size). Number + * of labels is a u8 so check for overflow. + */ + if (len & 3 || len / 4 > 255) return -EINVAL; /* Limit the number of new labels allowed */ @@ -1465,6 +1560,10 @@ int nla_get_labels(const struct nlattr *nla, if (nla_labels > max_labels) return -EINVAL; + /* when label == NULL, caller wants number of labels */ + if (!label) + goto out; + nla_label = nla_data(nla); bos = true; for (i = nla_labels - 1; i >= 0; i--, bos = false) { @@ -1488,6 +1587,7 @@ int nla_get_labels(const struct nlattr *nla, label[i] = dec.label; } +out: *labels = nla_labels; return 0; } @@ -1543,13 +1643,13 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, int index; int err; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy); + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy, + NULL); if (err < 0) goto errout; err = -EINVAL; rtm = nlmsg_data(nlh); - memset(cfg, 0, sizeof(*cfg)); if (rtm->rtm_family != AF_MPLS) goto errout; @@ -1577,6 +1677,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->rc_label = LABEL_NOT_SPECIFIED; cfg->rc_protocol = rtm->rtm_protocol; cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC; + cfg->rc_ttl_propagate = MPLS_TTL_PROP_DEFAULT; cfg->rc_nlflags = nlh->nlmsg_flags; cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->rc_nlinfo.nlh = nlh; @@ -1623,6 +1724,17 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->rc_mp_len = nla_len(nla); break; } + case RTA_TTL_PROPAGATE: + { + u8 ttl_propagate = nla_get_u8(nla); + + if (ttl_propagate > 1) + goto errout; + cfg->rc_ttl_propagate = ttl_propagate ? + MPLS_TTL_PROP_ENABLED : + MPLS_TTL_PROP_DISABLED; + break; + } default: /* Unsupported attribute */ goto errout; @@ -1636,27 +1748,43 @@ errout: static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct mpls_route_config cfg; + struct mpls_route_config *cfg; int err; - err = rtm_to_route_config(skb, nlh, &cfg); + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return -ENOMEM; + + err = rtm_to_route_config(skb, nlh, cfg); if (err < 0) - return err; + goto out; - return mpls_route_del(&cfg); + err = mpls_route_del(cfg); +out: + kfree(cfg); + + return err; } static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct mpls_route_config cfg; + struct mpls_route_config *cfg; int err; - err = rtm_to_route_config(skb, nlh, &cfg); + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return -ENOMEM; + + err = rtm_to_route_config(skb, nlh, cfg); if (err < 0) - return err; + goto out; - return mpls_route_add(&cfg); + err = mpls_route_add(cfg); +out: + kfree(cfg); + + return err; } static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, @@ -1683,6 +1811,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, if (nla_put_labels(skb, RTA_DST, 1, &label)) goto nla_put_failure; + + if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) { + bool ttl_propagate = + rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED; + + if (nla_put_u8(skb, RTA_TTL_PROPAGATE, + ttl_propagate)) + goto nla_put_failure; + } if (rt->rt_nhn == 1) { const struct mpls_nh *nh = rt->rt_nh; @@ -1704,21 +1841,23 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, } else { struct rtnexthop *rtnh; struct nlattr *mp; - int dead = 0; - int linkdown = 0; + u8 linkdown = 0; + u8 dead = 0; mp = nla_nest_start(skb, RTA_MULTIPATH); if (!mp) goto nla_put_failure; for_nexthops(rt) { + dev = rtnl_dereference(nh->nh_dev); + if (!dev) + continue; + rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); if (!rtnh) goto nla_put_failure; - dev = rtnl_dereference(nh->nh_dev); - if (dev) - rtnh->rtnh_ifindex = dev->ifindex; + rtnh->rtnh_ifindex = dev->ifindex; if (nh->nh_flags & RTNH_F_LINKDOWN) { rtnh->rtnh_flags |= RTNH_F_LINKDOWN; linkdown++; @@ -1793,7 +1932,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt) { size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) - + nla_total_size(4); /* RTA_DST */ + + nla_total_size(4) /* RTA_DST */ + + nla_total_size(1); /* RTA_TTL_PROPAGATE */ if (rt->rt_nhn == 1) { struct mpls_nh *nh = rt->rt_nh; @@ -1809,6 +1949,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt) size_t nhsize = 0; for_nexthops(rt) { + if (!rtnl_dereference(nh->nh_dev)) + continue; nhsize += nla_total_size(sizeof(struct rtnexthop)); /* RTA_VIA */ if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) @@ -1871,12 +2013,13 @@ static int resize_platform_label_table(struct net *net, size_t limit) /* In case the predefined labels need to be populated */ if (limit > MPLS_LABEL_IPV4NULL) { struct net_device *lo = net->loopback_dev; - rt0 = mpls_rt_alloc(1, lo->addr_len); - if (!rt0) + rt0 = mpls_rt_alloc(1, lo->addr_len, 0); + if (IS_ERR(rt0)) goto nort0; RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo); rt0->rt_protocol = RTPROT_KERNEL; rt0->rt_payload_type = MPT_IPV4; + rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE; rt0->rt_nh->nh_via_alen = lo->addr_len; memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr, @@ -1884,12 +2027,13 @@ static int resize_platform_label_table(struct net *net, size_t limit) } if (limit > MPLS_LABEL_IPV6NULL) { struct net_device *lo = net->loopback_dev; - rt2 = mpls_rt_alloc(1, lo->addr_len); - if (!rt2) + rt2 = mpls_rt_alloc(1, lo->addr_len, 0); + if (IS_ERR(rt2)) goto nort2; RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo); rt2->rt_protocol = RTPROT_KERNEL; rt2->rt_payload_type = MPT_IPV6; + rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE; rt2->rt_nh->nh_via_alen = lo->addr_len; memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr, @@ -1971,6 +2115,9 @@ static int mpls_platform_labels(struct ctl_table *table, int write, return ret; } +#define MPLS_NS_SYSCTL_OFFSET(field) \ + (&((struct net *)0)->field) + static const struct ctl_table mpls_table[] = { { .procname = "platform_labels", @@ -1979,21 +2126,47 @@ static const struct ctl_table mpls_table[] = { .mode = 0644, .proc_handler = mpls_platform_labels, }, + { + .procname = "ip_ttl_propagate", + .data = MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate), + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { + .procname = "default_ttl", + .data = MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl), + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &ttl_max, + }, { } }; static int mpls_net_init(struct net *net) { struct ctl_table *table; + int i; net->mpls.platform_labels = 0; net->mpls.platform_label = NULL; + net->mpls.ip_ttl_propagate = 1; + net->mpls.default_ttl = 255; table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL); if (table == NULL) return -ENOMEM; - table[0].data = net; + /* Table data contains only offsets relative to the base of + * the mdev at this point, so make them absolute. + */ + for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++) + table[i].data = (char *)net + (uintptr_t)table[i].data; + net->mpls.ctl = register_net_sysctl(net, "net/mpls", table); if (net->mpls.ctl == NULL) { kfree(table); diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 76360d8b9579..4db6a5971322 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -2,6 +2,11 @@ #define MPLS_INTERNAL_H #include <net/mpls.h> +/* put a reasonable limit on the number of labels + * we will accept from userspace + */ +#define MAX_NEW_LABELS 30 + struct mpls_entry_decoded { u32 label; u8 ttl; @@ -64,7 +69,6 @@ struct mpls_dev { struct sk_buff; #define LABEL_NOT_SPECIFIED (1 << 20) -#define MAX_NEW_LABELS 2 /* This maximum ha length copied from the definition of struct neighbour */ #define VIA_ALEN_ALIGN sizeof(unsigned long) @@ -83,11 +87,35 @@ enum mpls_payload_type { struct mpls_nh { /* next hop label forwarding entry */ struct net_device __rcu *nh_dev; + + /* nh_flags is accessed under RCU in the packet path; it is + * modified handling netdev events with rtnl lock held + */ unsigned int nh_flags; - u32 nh_label[MAX_NEW_LABELS]; u8 nh_labels; u8 nh_via_alen; u8 nh_via_table; + u8 nh_reserved1; + + u32 nh_label[0]; +}; + +/* offset of via from beginning of mpls_nh */ +#define MPLS_NH_VIA_OFF(num_labels) \ + ALIGN(sizeof(struct mpls_nh) + (num_labels) * sizeof(u32), \ + VIA_ALEN_ALIGN) + +/* all nexthops within a route have the same size based on the + * max number of labels and max via length across all nexthops + */ +#define MPLS_NH_SIZE(num_labels, max_via_alen) \ + (MPLS_NH_VIA_OFF((num_labels)) + \ + ALIGN((max_via_alen), VIA_ALEN_ALIGN)) + +enum mpls_ttl_propagation { + MPLS_TTL_PROP_DEFAULT, + MPLS_TTL_PROP_ENABLED, + MPLS_TTL_PROP_DISABLED, }; /* The route, nexthops and vias are stored together in the same memory @@ -98,16 +126,16 @@ struct mpls_nh { /* next hop label forwarding entry */ * +----------------------+ * | mpls_nh 0 | * +----------------------+ - * | ... | - * +----------------------+ - * | mpls_nh n-1 | - * +----------------------+ - * | alignment padding | + * | alignment padding | 4 bytes for odd number of labels * +----------------------+ * | via[rt_max_alen] 0 | * +----------------------+ + * | alignment padding | via's aligned on sizeof(unsigned long) + * +----------------------+ * | ... | * +----------------------+ + * | mpls_nh n-1 | + * +----------------------+ * | via[rt_max_alen] n-1 | * +----------------------+ */ @@ -116,22 +144,30 @@ struct mpls_route { /* next hop label forwarding entry */ u8 rt_protocol; u8 rt_payload_type; u8 rt_max_alen; - unsigned int rt_nhn; - unsigned int rt_nhn_alive; + u8 rt_ttl_propagate; + u8 rt_nhn; + /* rt_nhn_alive is accessed under RCU in the packet path; it + * is modified handling netdev events with rtnl lock held + */ + u8 rt_nhn_alive; + u8 rt_nh_size; + u8 rt_via_offset; + u8 rt_reserved1; struct mpls_nh rt_nh[0]; }; #define for_nexthops(rt) { \ - int nhsel; struct mpls_nh *nh; \ - for (nhsel = 0, nh = (rt)->rt_nh; \ + int nhsel; struct mpls_nh *nh; u8 *__nh; \ + for (nhsel = 0, nh = (rt)->rt_nh, __nh = (u8 *)((rt)->rt_nh); \ nhsel < (rt)->rt_nhn; \ - nh++, nhsel++) + __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++) #define change_nexthops(rt) { \ - int nhsel; struct mpls_nh *nh; \ - for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh); \ + int nhsel; struct mpls_nh *nh; u8 *__nh; \ + for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh), \ + __nh = (u8 *)((rt)->rt_nh); \ nhsel < (rt)->rt_nhn; \ - nh++, nhsel++) + __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++) #define endfor_nexthops(rt) } @@ -166,7 +202,7 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]); -int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels, +int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels, u32 label[]); int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table, u8 via[]); diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index e4e4424f9eb1..369c7a23c86c 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -29,6 +29,7 @@ static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = { [MPLS_IPTUNNEL_DST] = { .type = NLA_U32 }, + [MPLS_IPTUNNEL_TTL] = { .type = NLA_U8 }, }; static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en) @@ -49,6 +50,7 @@ static int mpls_xmit(struct sk_buff *skb) struct rtable *rt = NULL; struct rt6_info *rt6 = NULL; struct mpls_dev *out_mdev; + struct net *net; int err = 0; bool bos; int i; @@ -56,17 +58,7 @@ static int mpls_xmit(struct sk_buff *skb) /* Find the output device */ out_dev = dst->dev; - - /* Obtain the ttl */ - if (dst->ops->family == AF_INET) { - ttl = ip_hdr(skb)->ttl; - rt = (struct rtable *)dst; - } else if (dst->ops->family == AF_INET6) { - ttl = ipv6_hdr(skb)->hop_limit; - rt6 = (struct rt6_info *)dst; - } else { - goto drop; - } + net = dev_net(out_dev); skb_orphan(skb); @@ -78,6 +70,38 @@ static int mpls_xmit(struct sk_buff *skb) tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate); + /* Obtain the ttl using the following set of rules. + * + * LWT ttl propagation setting: + * - disabled => use default TTL value from LWT + * - enabled => use TTL value from IPv4/IPv6 header + * - default => + * Global ttl propagation setting: + * - disabled => use default TTL value from global setting + * - enabled => use TTL value from IPv4/IPv6 header + */ + if (dst->ops->family == AF_INET) { + if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED) + ttl = tun_encap_info->default_ttl; + else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT && + !net->mpls.ip_ttl_propagate) + ttl = net->mpls.default_ttl; + else + ttl = ip_hdr(skb)->ttl; + rt = (struct rtable *)dst; + } else if (dst->ops->family == AF_INET6) { + if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED) + ttl = tun_encap_info->default_ttl; + else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT && + !net->mpls.ip_ttl_propagate) + ttl = net->mpls.default_ttl; + else + ttl = ipv6_hdr(skb)->hop_limit; + rt6 = (struct rt6_info *)dst; + } else { + goto drop; + } + /* Verify the destination can hold the packet */ new_header_size = mpls_encap_size(tun_encap_info); mtu = mpls_dev_mtu(out_dev); @@ -140,10 +164,11 @@ static int mpls_build_state(struct nlattr *nla, struct mpls_iptunnel_encap *tun_encap_info; struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1]; struct lwtunnel_state *newts; + u8 n_labels; int ret; ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla, - mpls_iptunnel_policy); + mpls_iptunnel_policy, NULL); if (ret < 0) return ret; @@ -151,15 +176,32 @@ static int mpls_build_state(struct nlattr *nla, return -EINVAL; - newts = lwtunnel_state_alloc(sizeof(*tun_encap_info)); + /* determine number of labels */ + if (nla_get_labels(tb[MPLS_IPTUNNEL_DST], + MAX_NEW_LABELS, &n_labels, NULL)) + return -EINVAL; + + newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) + + n_labels * sizeof(u32)); if (!newts) return -ENOMEM; tun_encap_info = mpls_lwtunnel_encap(newts); - ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS, + ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], n_labels, &tun_encap_info->labels, tun_encap_info->label); if (ret) goto errout; + + tun_encap_info->ttl_propagate = MPLS_TTL_PROP_DEFAULT; + + if (tb[MPLS_IPTUNNEL_TTL]) { + tun_encap_info->default_ttl = nla_get_u8(tb[MPLS_IPTUNNEL_TTL]); + /* TTL 0 implies propagate from IP header */ + tun_encap_info->ttl_propagate = tun_encap_info->default_ttl ? + MPLS_TTL_PROP_DISABLED : + MPLS_TTL_PROP_ENABLED; + } + newts->type = LWTUNNEL_ENCAP_MPLS; newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT; newts->headroom = mpls_encap_size(tun_encap_info); @@ -186,6 +228,10 @@ static int mpls_fill_encap_info(struct sk_buff *skb, tun_encap_info->label)) goto nla_put_failure; + if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT && + nla_put_u8(skb, MPLS_IPTUNNEL_TTL, tun_encap_info->default_ttl)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -195,10 +241,16 @@ nla_put_failure: static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate) { struct mpls_iptunnel_encap *tun_encap_info; + int nlsize; tun_encap_info = mpls_lwtunnel_encap(lwtstate); - return nla_total_size(tun_encap_info->labels * 4); + nlsize = nla_total_size(tun_encap_info->labels * 4); + + if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT) + nlsize += nla_total_size(1); + + return nlsize; } static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) @@ -207,10 +259,12 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b); int l; - if (a_hdr->labels != b_hdr->labels) + if (a_hdr->labels != b_hdr->labels || + a_hdr->ttl_propagate != b_hdr->ttl_propagate || + a_hdr->default_ttl != b_hdr->default_ttl) return 1; - for (l = 0; l < MAX_NEW_LABELS; l++) + for (l = 0; l < a_hdr->labels; l++) if (a_hdr->label[l] != b_hdr->label[l]) return 1; return 0; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index c296f9b606d4..9bd5b6636181 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -295,7 +295,8 @@ ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy)) + if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, + ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) return -IPSET_ERR_PROTOCOL; @@ -313,7 +314,8 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy)) + if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, + ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) return -IPSET_ERR_PROTOCOL; @@ -906,7 +908,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl, /* Without holding any locks, create private part. */ if (attr[IPSET_ATTR_DATA] && nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], - set->type->create_policy)) { + set->type->create_policy, NULL)) { ret = -IPSET_ERR_PROTOCOL; goto put_out; } @@ -1257,8 +1259,8 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) ip_set_id_t index; /* Second pass, so parser can't fail */ - nla_parse(cda, IPSET_ATTR_CMD_MAX, - attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); + nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, + ip_set_setname_policy, NULL); if (cda[IPSET_ATTR_SETNAME]) { struct ip_set *set; @@ -1305,7 +1307,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) * manually :-( */ if (nlh->nlmsg_flags & NLM_F_ACK) - netlink_ack(cb->skb, nlh, ret); + netlink_ack(cb->skb, nlh, ret, NULL); return ret; } } @@ -1501,9 +1503,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, memcpy(&errmsg->msg, nlh, nlh->nlmsg_len); cmdattr = (void *)&errmsg->msg + min_len; - nla_parse(cda, IPSET_ATTR_CMD_MAX, - cmdattr, nlh->nlmsg_len - min_len, - ip_set_adt_policy); + nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr, + nlh->nlmsg_len - min_len, ip_set_adt_policy, NULL); errline = nla_data(cda[IPSET_ATTR_LINENO]); @@ -1549,7 +1550,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, if (attr[IPSET_ATTR_DATA]) { if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], - set->type->adt_policy)) + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, use_lineno); @@ -1561,7 +1562,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, - set->type->adt_policy)) + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, use_lineno); @@ -1603,7 +1604,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, if (attr[IPSET_ATTR_DATA]) { if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], - set->type->adt_policy)) + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, use_lineno); @@ -1615,7 +1616,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, - set->type->adt_policy)) + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, use_lineno); @@ -1646,7 +1647,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, return -ENOENT; if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], - set->type->adt_policy)) + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; rcu_read_lock_bh(); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index e6a2753dff9e..3d2ac71a83ec 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -181,7 +181,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) if (!(cp->flags & IP_VS_CONN_F_HASHED)) { cp->flags |= IP_VS_CONN_F_HASHED; - atomic_inc(&cp->refcnt); + refcount_inc(&cp->refcnt); hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]); ret = 1; } else { @@ -215,7 +215,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) if (cp->flags & IP_VS_CONN_F_HASHED) { hlist_del_rcu(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; - atomic_dec(&cp->refcnt); + refcount_dec(&cp->refcnt); ret = 1; } else ret = 0; @@ -242,13 +242,13 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) if (cp->flags & IP_VS_CONN_F_HASHED) { ret = false; /* Decrease refcnt and unlink conn only if we are last user */ - if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) { + if (refcount_dec_if_one(&cp->refcnt)) { hlist_del_rcu(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; ret = true; } } else - ret = atomic_read(&cp->refcnt) ? false : true; + ret = refcount_read(&cp->refcnt) ? false : true; spin_unlock(&cp->lock); ct_write_unlock_bh(hash); @@ -475,7 +475,7 @@ static void __ip_vs_conn_put_timer(struct ip_vs_conn *cp) void ip_vs_conn_put(struct ip_vs_conn *cp) { if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && - (atomic_read(&cp->refcnt) == 1) && + (refcount_read(&cp->refcnt) == 1) && !timer_pending(&cp->timer)) /* expire connection immediately */ __ip_vs_conn_put_notimer(cp); @@ -617,8 +617,8 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport), ip_vs_fwd_tag(cp), cp->state, - cp->flags, atomic_read(&cp->refcnt), - atomic_read(&dest->refcnt)); + cp->flags, refcount_read(&cp->refcnt), + refcount_read(&dest->refcnt)); /* Update the connection counters */ if (!(flags & IP_VS_CONN_F_TEMPLATE)) { @@ -714,8 +714,8 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport), ip_vs_fwd_tag(cp), cp->state, - cp->flags, atomic_read(&cp->refcnt), - atomic_read(&dest->refcnt)); + cp->flags, refcount_read(&cp->refcnt), + refcount_read(&dest->refcnt)); /* Update the connection counters */ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { @@ -863,10 +863,10 @@ static void ip_vs_conn_expire(unsigned long data) expire_later: IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n", - atomic_read(&cp->refcnt), + refcount_read(&cp->refcnt), atomic_read(&cp->n_control)); - atomic_inc(&cp->refcnt); + refcount_inc(&cp->refcnt); cp->timeout = 60*HZ; if (ipvs->sync_state & IP_VS_STATE_MASTER) @@ -941,7 +941,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, * it in the table, so that other thread run ip_vs_random_dropentry * but cannot drop this entry. */ - atomic_set(&cp->refcnt, 1); + refcount_set(&cp->refcnt, 1); cp->control = NULL; atomic_set(&cp->n_control, 0); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index db40050f8785..b4a746d0e39b 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -542,7 +542,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport), - cp->flags, atomic_read(&cp->refcnt)); + cp->flags, refcount_read(&cp->refcnt)); ip_vs_conn_stats(cp, svc); return cp; @@ -1193,7 +1193,7 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), - cp->flags, atomic_read(&cp->refcnt)); + cp->flags, refcount_read(&cp->refcnt)); LeaveFunction(12); return cp; } @@ -2231,8 +2231,6 @@ static int __net_init __ip_vs_init(struct net *net) if (ip_vs_sync_net_init(ipvs) < 0) goto sync_fail; - printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", - sizeof(struct netns_ipvs), ipvs->gen); return 0; /* * Error handling diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5aeb0dde6ccc..892da70866d6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -699,7 +699,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, dest->vfwmark, IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), - atomic_read(&dest->refcnt)); + refcount_read(&dest->refcnt)); if (dest->af == dest_af && ip_vs_addr_equal(dest_af, &dest->addr, daddr) && dest->port == dport && @@ -934,7 +934,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atomic_set(&dest->activeconns, 0); atomic_set(&dest->inactconns, 0); atomic_set(&dest->persistconns, 0); - atomic_set(&dest->refcnt, 1); + refcount_set(&dest->refcnt, 1); INIT_HLIST_NODE(&dest->d_list); spin_lock_init(&dest->dst_lock); @@ -998,7 +998,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " "dest->refcnt=%d, service %u/%s:%u\n", IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport), - atomic_read(&dest->refcnt), + refcount_read(&dest->refcnt), dest->vfwmark, IP_VS_DBG_ADDR(svc->af, &dest->vaddr), ntohs(dest->vport)); @@ -1074,7 +1074,7 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, spin_lock_bh(&ipvs->dest_trash_lock); IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), - atomic_read(&dest->refcnt)); + refcount_read(&dest->refcnt)); if (list_empty(&ipvs->dest_trash) && !cleanup) mod_timer(&ipvs->dest_trash_timer, jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); @@ -1157,7 +1157,7 @@ static void ip_vs_dest_trash_expire(unsigned long data) spin_lock(&ipvs->dest_trash_lock); list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { - if (atomic_read(&dest->refcnt) > 1) + if (refcount_read(&dest->refcnt) > 1) continue; if (dest->idle_start) { if (time_before(now, dest->idle_start + @@ -1545,7 +1545,7 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) dev->name, IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), - atomic_read(&dest->refcnt)); + refcount_read(&dest->refcnt)); __ip_vs_dst_cache_reset(dest); } spin_unlock_bh(&dest->dst_lock); @@ -3089,7 +3089,8 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, /* Parse mandatory identifying service fields first */ if (nla == NULL || - nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy)) + nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, + ip_vs_svc_policy, NULL)) return -EINVAL; nla_af = attrs[IPVS_SVC_ATTR_AF]; @@ -3251,8 +3252,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, mutex_lock(&__ip_vs_mutex); /* Try to find the service for which to dump destinations */ - if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, - IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) + if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, + ip_vs_cmd_policy, NULL)) goto out_err; @@ -3288,7 +3289,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, /* Parse mandatory identifying destination fields first */ if (nla == NULL || - nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy)) + nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, + ip_vs_dest_policy, NULL)) return -EINVAL; nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; @@ -3530,7 +3532,7 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], - ip_vs_daemon_policy)) + ip_vs_daemon_policy, info->extack)) goto out; if (cmd == IPVS_CMD_NEW_DAEMON) diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 5824927cf8e0..b6aa4a970c6e 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -448,7 +448,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc) IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), - atomic_read(&least->refcnt), + refcount_read(&least->refcnt), atomic_read(&least->weight), loh); return least; diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 703f11877bee..c13ff575f9f7 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -204,7 +204,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), - atomic_read(&least->refcnt), + refcount_read(&least->refcnt), atomic_read(&least->weight), loh); return least; } @@ -249,7 +249,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) __func__, IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port), atomic_read(&most->activeconns), - atomic_read(&most->refcnt), + refcount_read(&most->refcnt), atomic_read(&most->weight), moh); return most; } @@ -612,7 +612,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc) IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), - atomic_read(&least->refcnt), + refcount_read(&least->refcnt), atomic_read(&least->weight), loh); return least; diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index a8b63401e773..7d9d4ac596ca 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -110,7 +110,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), - atomic_read(&least->refcnt), + refcount_read(&least->refcnt), atomic_read(&least->weight), loh); return least; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index d952d67f904d..56f8e4b204ff 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -447,7 +447,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, ntohs(cp->cport), sctp_state_name(cp->state), sctp_state_name(next_state), - atomic_read(&cp->refcnt)); + refcount_read(&cp->refcnt)); if (dest) { if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && (next_state != IP_VS_SCTP_S_ESTABLISHED)) { diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 5117bcb7d2f0..12dc8d5bc37d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -557,7 +557,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, ntohs(cp->cport), tcp_state_name(cp->state), tcp_state_name(new_state), - atomic_read(&cp->refcnt)); + refcount_read(&cp->refcnt)); if (dest) { if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index 58bacfc461ee..ee0530d14c5f 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -97,7 +97,7 @@ stop: "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->activeconns), - atomic_read(&dest->refcnt), atomic_read(&dest->weight)); + refcount_read(&dest->refcnt), atomic_read(&dest->weight)); return dest; } diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index f8e2d00f528b..ab23cf203437 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -111,7 +111,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), - atomic_read(&least->refcnt), + refcount_read(&least->refcnt), atomic_read(&least->weight), loh); return least; diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 6b366fd90554..6add39e0ec20 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -83,7 +83,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), - atomic_read(&least->refcnt), + refcount_read(&least->refcnt), atomic_read(&least->weight), loh); return least; diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 17e6d4406ca7..62258dd457ac 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -218,7 +218,7 @@ found: "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->activeconns), - atomic_read(&dest->refcnt), + refcount_read(&dest->refcnt), atomic_read(&dest->weight)); mark->cl = dest; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 071b97fcbefb..3d621b8d7b8a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -181,7 +181,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; seqcount_t nf_conntrack_generation __read_mostly; -DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); +/* nf_conn must be 8 bytes aligned, as the 3 LSB bits are used + * for the nfctinfo. We cheat by (ab)using the PER CPU cache line + * alignment to enforce this. + */ +DEFINE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); static unsigned int nf_conntrack_hash_rnd __read_mostly; @@ -1129,7 +1133,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ -static struct nf_conntrack_tuple_hash * +static noinline struct nf_conntrack_tuple_hash * init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, @@ -1237,21 +1241,20 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; } -/* On success, returns conntrack ptr, sets skb->_nfct | ctinfo */ -static inline struct nf_conn * +/* On success, returns 0, sets skb->_nfct | ctinfo */ +static int resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, struct nf_conntrack_l3proto *l3proto, - struct nf_conntrack_l4proto *l4proto, - int *set_reply, - enum ip_conntrack_info *ctinfo) + struct nf_conntrack_l4proto *l4proto) { const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; + enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; struct nf_conn *ct; u32 hash; @@ -1260,7 +1263,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, dataoff, l3num, protonum, net, &tuple, l3proto, l4proto)) { pr_debug("Can't get tuple\n"); - return NULL; + return 0; } /* look for tuple match */ @@ -1271,33 +1274,30 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, skb, dataoff, hash); if (!h) - return NULL; + return 0; if (IS_ERR(h)) - return (void *)h; + return PTR_ERR(h); } ct = nf_ct_tuplehash_to_ctrack(h); /* It exists; we have (non-exclusive) reference. */ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { - *ctinfo = IP_CT_ESTABLISHED_REPLY; - /* Please set reply bit if this packet OK */ - *set_reply = 1; + ctinfo = IP_CT_ESTABLISHED_REPLY; } else { /* Once we've had two way comms, always ESTABLISHED. */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { pr_debug("normal packet for %p\n", ct); - *ctinfo = IP_CT_ESTABLISHED; + ctinfo = IP_CT_ESTABLISHED; } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { pr_debug("related packet for %p\n", ct); - *ctinfo = IP_CT_RELATED; + ctinfo = IP_CT_RELATED; } else { pr_debug("new packet for %p\n", ct); - *ctinfo = IP_CT_NEW; + ctinfo = IP_CT_NEW; } - *set_reply = 0; } - nf_ct_set(skb, ct, *ctinfo); - return ct; + nf_ct_set(skb, ct, ctinfo); + return 0; } unsigned int @@ -1311,7 +1311,6 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, unsigned int *timeouts; unsigned int dataoff; u_int8_t protonum; - int set_reply = 0; int ret; tmpl = nf_ct_get(skb, &ctinfo); @@ -1354,23 +1353,22 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, goto out; } repeat: - ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, - l3proto, l4proto, &set_reply, &ctinfo); - if (!ct) { - /* Not valid part of a connection */ - NF_CT_STAT_INC_ATOMIC(net, invalid); - ret = NF_ACCEPT; - goto out; - } - - if (IS_ERR(ct)) { + ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, + l3proto, l4proto); + if (ret < 0) { /* Too stressed to deal. */ NF_CT_STAT_INC_ATOMIC(net, drop); ret = NF_DROP; goto out; } - NF_CT_ASSERT(skb_nfct(skb)); + ct = nf_ct_get(skb, &ctinfo); + if (!ct) { + /* Not valid part of a connection */ + NF_CT_STAT_INC_ATOMIC(net, invalid); + ret = NF_ACCEPT; + goto out; + } /* Decide what timeout policy we want to apply to this flow. */ timeouts = nf_ct_timeout_lookup(net, ct, l4proto); @@ -1395,7 +1393,8 @@ repeat: goto out; } - if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) + if (ctinfo == IP_CT_ESTABLISHED_REPLY && + !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_REPLY, ct); out: if (tmpl) diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index da9df2d56e66..22fc32143e9c 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -290,6 +290,7 @@ void nf_conntrack_unregister_notifier(struct net *net, BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); @@ -326,6 +327,7 @@ void nf_ct_expect_unregister_notifier(struct net *net, BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 4b2e1fb28bb4..a5ca5e426bae 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -57,7 +57,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, hlist_del_rcu(&exp->hnode); net->ct.expect_count--; - hlist_del(&exp->lnode); + hlist_del_rcu(&exp->lnode); master_help->expecting[exp->class]--; nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report); @@ -133,7 +133,7 @@ nf_ct_expect_find_get(struct net *net, rcu_read_lock(); i = __nf_ct_expect_find(net, zone, tuple); - if (i && !atomic_inc_not_zero(&i->use)) + if (i && !refcount_inc_not_zero(&i->use)) i = NULL; rcu_read_unlock(); @@ -186,7 +186,7 @@ nf_ct_find_expectation(struct net *net, return NULL; if (exp->flags & NF_CT_EXPECT_PERMANENT) { - atomic_inc(&exp->use); + refcount_inc(&exp->use); return exp; } else if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); @@ -275,7 +275,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) return NULL; new->master = me; - atomic_set(&new->use, 1); + refcount_set(&new->use, 1); return new; } EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); @@ -348,7 +348,7 @@ static void nf_ct_expect_free_rcu(struct rcu_head *head) void nf_ct_expect_put(struct nf_conntrack_expect *exp) { - if (atomic_dec_and_test(&exp->use)) + if (refcount_dec_and_test(&exp->use)) call_rcu(&exp->rcu, nf_ct_expect_free_rcu); } EXPORT_SYMBOL_GPL(nf_ct_expect_put); @@ -361,9 +361,9 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple); /* two references : one for hash insert, one for the timer */ - atomic_add(2, &exp->use); + refcount_add(2, &exp->use); - hlist_add_head(&exp->lnode, &master_help->expectations); + hlist_add_head_rcu(&exp->lnode, &master_help->expectations); master_help->expecting[exp->class]++; hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]); diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 02bcf00c2492..008299b7f78f 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -53,7 +53,11 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); - BUG_ON(t == NULL); + if (!t) { + rcu_read_unlock(); + return NULL; + } + off = ALIGN(sizeof(struct nf_ct_ext), t->align); len = off + t->len + var_alloc_len; alloc_size = t->alloc_size + var_alloc_len; @@ -88,7 +92,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); - BUG_ON(t == NULL); + if (!t) { + rcu_read_unlock(); + return NULL; + } newoff = ALIGN(old->len, t->align); newlen = newoff + t->len + var_alloc_len; @@ -175,6 +182,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type) RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); - rcu_barrier(); /* Wait for completion of call_rcu()'s */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_ct_extend_unregister); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 6dc44d9b4190..4eeb3418366a 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -158,16 +158,25 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; + rcu_read_lock(); + h = __nf_conntrack_helper_find(name, l3num, protonum); #ifdef CONFIG_MODULES if (h == NULL) { - if (request_module("nfct-helper-%s", name) == 0) + rcu_read_unlock(); + if (request_module("nfct-helper-%s", name) == 0) { + rcu_read_lock(); h = __nf_conntrack_helper_find(name, l3num, protonum); + } else { + return h; + } } #endif if (h != NULL && !try_module_get(h->me)) h = NULL; + rcu_read_unlock(); + return h; } EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); @@ -311,38 +320,36 @@ void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n) } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister); +/* Caller should hold the rcu lock */ struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_name(const char *name) { struct nf_ct_helper_expectfn *cur; bool found = false; - rcu_read_lock(); list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) { if (!strcmp(cur->name, name)) { found = true; break; } } - rcu_read_unlock(); return found ? cur : NULL; } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_name); +/* Caller should hold the rcu lock */ struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_symbol(const void *symbol) { struct nf_ct_helper_expectfn *cur; bool found = false; - rcu_read_lock(); list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) { if (cur->expectfn == symbol) { found = true; break; } } - rcu_read_unlock(); return found ? cur : NULL; } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6806b5e73567..aafd25dff8c0 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -908,7 +908,7 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_l3proto *l3proto; int ret = 0; - ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); + ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL); if (ret < 0) return ret; @@ -917,7 +917,7 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr, if (likely(l3proto->nlattr_to_tuple)) { ret = nla_validate_nested(attr, CTA_IP_MAX, - l3proto->nla_policy); + l3proto->nla_policy, NULL); if (ret == 0) ret = l3proto->nlattr_to_tuple(tb, tuple); } @@ -938,7 +938,8 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr, struct nf_conntrack_l4proto *l4proto; int ret = 0; - ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy); + ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy, + NULL); if (ret < 0) return ret; @@ -951,7 +952,7 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr, if (likely(l4proto->nlattr_to_tuple)) { ret = nla_validate_nested(attr, CTA_PROTO_MAX, - l4proto->nla_policy); + l4proto->nla_policy, NULL); if (ret == 0) ret = l4proto->nlattr_to_tuple(tb, tuple); } @@ -1015,7 +1016,8 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], memset(tuple, 0, sizeof(*tuple)); - err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy); + err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy, + NULL); if (err < 0) return err; @@ -1065,7 +1067,7 @@ static int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, int err; struct nlattr *tb[CTA_HELP_MAX+1]; - err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy); + err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy, NULL); if (err < 0) return err; @@ -1488,11 +1490,16 @@ static int ctnetlink_change_helper(struct nf_conn *ct, * treat the second attempt as a no-op instead of returning * an error. */ - if (help && help->helper && - !strcmp(help->helper->name, helpname)) - return 0; - else - return -EBUSY; + err = -EBUSY; + if (help) { + rcu_read_lock(); + helper = rcu_dereference(help->helper); + if (helper && !strcmp(helper->name, helpname)) + err = 0; + rcu_read_unlock(); + } + + return err; } if (!strcmp(helpname, "")) { @@ -1566,7 +1573,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct, struct nf_conntrack_l4proto *l4proto; int err = 0; - err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy); + err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy, + NULL); if (err < 0) return err; @@ -1591,7 +1599,7 @@ static int change_seq_adj(struct nf_ct_seqadj *seq, int err; struct nlattr *cda[CTA_SEQADJ_MAX+1]; - err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy); + err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy, NULL); if (err < 0) return err; @@ -1929,9 +1937,9 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, err = 0; if (test_bit(IPS_EXPECTED_BIT, &ct->status)) - events = IPCT_RELATED; + events = 1 << IPCT_RELATED; else - events = IPCT_NEW; + events = 1 << IPCT_NEW; if (cda[CTA_LABELS] && ctnetlink_attach_labels(ct, cda) == 0) @@ -2348,7 +2356,7 @@ ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct) struct nlattr *cda[CTA_MAX+1]; int ret; - ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); + ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy, NULL); if (ret < 0) return ret; @@ -2385,7 +2393,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, struct nf_conntrack_expect *exp; int err; - err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy); + err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy, + NULL); if (err < 0) return err; @@ -2675,8 +2684,8 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: - hlist_for_each_entry(exp, &nf_ct_expect_hash[cb->args[0]], - hnode) { + hlist_for_each_entry_rcu(exp, &nf_ct_expect_hash[cb->args[0]], + hnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; @@ -2693,7 +2702,7 @@ restart: cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { - if (!atomic_inc_not_zero(&exp->use)) + if (!refcount_inc_not_zero(&exp->use)) continue; cb->args[1] = (unsigned long)exp; goto out; @@ -2727,7 +2736,7 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); last = (struct nf_conntrack_expect *)cb->args[1]; restart: - hlist_for_each_entry(exp, &help->expectations, lnode) { + hlist_for_each_entry_rcu(exp, &help->expectations, lnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; if (cb->args[1]) { @@ -2739,7 +2748,7 @@ restart: cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { - if (!atomic_inc_not_zero(&exp->use)) + if (!refcount_inc_not_zero(&exp->use)) continue; cb->args[1] = (unsigned long)exp; goto out; @@ -2789,6 +2798,12 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); + /* No expectation linked to this connection tracking. */ + if (!nfct_help(ct)) { + nf_ct_put(ct); + return 0; + } + c.data = ct; err = netlink_dump_start(ctnl, skb, nlh, &c); @@ -3004,7 +3019,8 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, struct nf_conntrack_tuple nat_tuple = {}; int err; - err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy); + err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, + exp_nat_nla_policy, NULL); if (err < 0) return err; @@ -3133,23 +3149,27 @@ ctnetlink_create_expect(struct net *net, return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); + rcu_read_lock(); if (cda[CTA_EXPECT_HELP_NAME]) { const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); helper = __nf_conntrack_helper_find(helpname, u3, nf_ct_protonum(ct)); if (helper == NULL) { + rcu_read_unlock(); #ifdef CONFIG_MODULES if (request_module("nfct-helper-%s", helpname) < 0) { err = -EOPNOTSUPP; goto err_ct; } + rcu_read_lock(); helper = __nf_conntrack_helper_find(helpname, u3, nf_ct_protonum(ct)); if (helper) { err = -EAGAIN; - goto err_ct; + goto err_rcu; } + rcu_read_unlock(); #endif err = -EOPNOTSUPP; goto err_ct; @@ -3159,11 +3179,13 @@ ctnetlink_create_expect(struct net *net, exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask); if (IS_ERR(exp)) { err = PTR_ERR(exp); - goto err_ct; + goto err_rcu; } err = nf_ct_expect_related_report(exp, portid, report); nf_ct_expect_put(exp); +err_rcu: + rcu_read_unlock(); err_ct: nf_ct_put(ct); return err; @@ -3442,6 +3464,7 @@ static void __exit ctnetlink_exit(void) #ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT RCU_INIT_POINTER(nfnl_ct_hook, NULL); #endif + synchronize_rcu(); } module_init(ctnetlink_init); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 93dd1c5b7bff..b2e02dfe7fa8 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -665,7 +665,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) return 0; err = nla_parse_nested(tb, CTA_PROTOINFO_DCCP_MAX, attr, - dccp_nla_policy); + dccp_nla_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 33279aab583d..2a7300587c87 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -584,10 +584,8 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) if (!attr) return 0; - err = nla_parse_nested(tb, - CTA_PROTOINFO_SCTP_MAX, - attr, - sctp_nla_policy); + err = nla_parse_nested(tb, CTA_PROTOINFO_SCTP_MAX, attr, + sctp_nla_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b122e9dacfed..85bde77ad967 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1234,7 +1234,8 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) if (!pattr) return 0; - err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy); + err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, + tcp_nla_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 94b14c5a8b17..908ba5abbc0b 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -751,7 +751,8 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr, const struct nf_nat_l4proto *l4proto; int err; - err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); + err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, + protonat_nla_policy, NULL); if (err < 0) return err; @@ -780,7 +781,7 @@ nfnetlink_parse_nat(const struct nlattr *nat, memset(range, 0, sizeof(*range)); - err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); + err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy, NULL); if (err < 0) return err; @@ -903,6 +904,8 @@ static void __exit nf_nat_cleanup(void) #ifdef CONFIG_XFRM RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); #endif + synchronize_rcu(); + for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c index 31d358691af0..804e8a0ab36e 100644 --- a/net/netfilter/nf_nat_proto_sctp.c +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -33,8 +33,16 @@ sctp_manip_pkt(struct sk_buff *skb, enum nf_nat_manip_type maniptype) { sctp_sctphdr_t *hdr; + int hdrsize = 8; - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + /* This could be an inner header returned in imcp packet; in such + * cases we cannot update the checksum field since it is outside + * of the 8 bytes of transport layer headers we are guaranteed. + */ + if (skb->len >= hdroff + sizeof(*hdr)) + hdrsize = sizeof(*hdr); + + if (!skb_make_writable(skb, hdroff + hdrsize)) return false; hdr = (struct sctphdr *)(skb->data + hdroff); @@ -47,6 +55,9 @@ sctp_manip_pkt(struct sk_buff *skb, hdr->dest = tuple->dst.u.sctp.port; } + if (hdrsize < sizeof(*hdr)) + return true; + if (skb->ip_summed != CHECKSUM_PARTIAL) { hdr->checksum = sctp_compute_cksum(skb, hdroff); skb->ip_summed = CHECKSUM_NONE; diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c index d43869879fcf..86067560a318 100644 --- a/net/netfilter/nf_nat_redirect.c +++ b/net/netfilter/nf_nat_redirect.c @@ -101,11 +101,13 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, rcu_read_lock(); idev = __in6_dev_get(skb->dev); if (idev != NULL) { + read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { newdst = ifa->addr; addr = true; break; } + read_unlock_bh(&idev->lock); } rcu_read_unlock(); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5e0ccfd5bb37..907431318637 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1182,7 +1182,8 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) struct nft_stats *stats; int err; - err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy); + err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy, + NULL); if (err < 0) return ERR_PTR(err); @@ -1257,7 +1258,7 @@ static int nft_chain_parse_hook(struct net *net, int err; err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], - nft_hook_policy); + nft_hook_policy, NULL); if (err < 0) return err; @@ -1724,7 +1725,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, struct nlattr *tb[NFTA_EXPR_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy); + err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy, NULL); if (err < 0) return err; @@ -1734,7 +1735,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, if (tb[NFTA_EXPR_DATA]) { err = nla_parse_nested(info->tb, type->maxattr, - tb[NFTA_EXPR_DATA], type->policy); + tb[NFTA_EXPR_DATA], type->policy, NULL); if (err < 0) goto err1; } else @@ -1772,8 +1773,19 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx, goto err1; } + if (ops->validate) { + const struct nft_data *data = NULL; + + err = ops->validate(ctx, expr, &data); + if (err < 0) + goto err2; + } + return 0; +err2: + if (ops->destroy) + ops->destroy(ctx, expr); err1: expr->ops = NULL; return err; @@ -2523,8 +2535,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, return 0; } -struct nft_set *nf_tables_set_lookup(const struct nft_table *table, - const struct nlattr *nla, u8 genmask) +static struct nft_set *nf_tables_set_lookup(const struct nft_table *table, + const struct nlattr *nla, u8 genmask) { struct nft_set *set; @@ -2538,11 +2550,10 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table, } return ERR_PTR(-ENOENT); } -EXPORT_SYMBOL_GPL(nf_tables_set_lookup); -struct nft_set *nf_tables_set_lookup_byid(const struct net *net, - const struct nlattr *nla, - u8 genmask) +static struct nft_set *nf_tables_set_lookup_byid(const struct net *net, + const struct nlattr *nla, + u8 genmask) { struct nft_trans *trans; u32 id = ntohl(nla_get_be32(nla)); @@ -2557,7 +2568,25 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net, } return ERR_PTR(-ENOENT); } -EXPORT_SYMBOL_GPL(nf_tables_set_lookup_byid); + +struct nft_set *nft_set_lookup(const struct net *net, + const struct nft_table *table, + const struct nlattr *nla_set_name, + const struct nlattr *nla_set_id, + u8 genmask) +{ + struct nft_set *set; + + set = nf_tables_set_lookup(table, nla_set_name, genmask); + if (IS_ERR(set)) { + if (!nla_set_id) + return set; + + set = nf_tables_set_lookup_byid(net, nla_set_id, genmask); + } + return set; +} +EXPORT_SYMBOL_GPL(nft_set_lookup); static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, const char *name) @@ -2851,7 +2880,8 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx, struct nlattr *da[NFTA_SET_DESC_MAX + 1]; int err; - err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy); + err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, + nft_set_desc_policy, NULL); if (err < 0) return err; @@ -3145,7 +3175,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, iter.count = 0; iter.err = 0; iter.fn = nf_tables_bind_check_setelem; - iter.flush = false; set->ops->walk(ctx, set, &iter); if (iter.err < 0) @@ -3354,7 +3383,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) int event, err; err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla, - NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy); + NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy, + NULL); if (err < 0) return err; @@ -3399,7 +3429,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) args.iter.count = 0; args.iter.err = 0; args.iter.fn = nf_tables_dump_setelem; - args.iter.flush = false; set->ops->walk(&ctx, set, &args.iter); nla_nest_end(skb, nest); @@ -3614,7 +3643,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, int err; err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, - nft_set_elem_policy); + nft_set_elem_policy, NULL); if (err < 0) return err; @@ -3844,7 +3873,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, int err; err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, - nft_set_elem_policy); + nft_set_elem_policy, NULL); if (err < 0) goto err1; @@ -3963,7 +3992,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct nft_set_iter iter = { .genmask = genmask, .fn = nft_flush_set, - .flush = true, }; set->ops->walk(&ctx, set, &iter); @@ -4067,7 +4095,8 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { [NFTA_OBJ_DATA] = { .type = NLA_NESTED }, }; -static struct nft_object *nft_obj_init(const struct nft_object_type *type, +static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, + const struct nft_object_type *type, const struct nlattr *attr) { struct nlattr *tb[type->maxattr + 1]; @@ -4075,7 +4104,8 @@ static struct nft_object *nft_obj_init(const struct nft_object_type *type, int err; if (attr) { - err = nla_parse_nested(tb, type->maxattr, attr, type->policy); + err = nla_parse_nested(tb, type->maxattr, attr, type->policy, + NULL); if (err < 0) goto err1; } else { @@ -4087,7 +4117,7 @@ static struct nft_object *nft_obj_init(const struct nft_object_type *type, if (obj == NULL) goto err1; - err = type->init((const struct nlattr * const *)tb, obj); + err = type->init(ctx, (const struct nlattr * const *)tb, obj); if (err < 0) goto err2; @@ -4195,7 +4225,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, if (IS_ERR(type)) return PTR_ERR(type); - obj = nft_obj_init(type, nla[NFTA_OBJ_DATA]); + obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err1; @@ -5114,7 +5144,6 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, iter.count = 0; iter.err = 0; iter.fn = nf_tables_loop_check_setelem; - iter.flush = false; set->ops->walk(ctx, set, &iter); if (iter.err < 0) @@ -5289,7 +5318,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_chain *chain; int err; - err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy); + err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy, + NULL); if (err < 0) return err; @@ -5419,7 +5449,7 @@ int nft_data_init(const struct nft_ctx *ctx, struct nlattr *tb[NFTA_DATA_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy); + err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 68eda920160e..e42f858b91d2 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -148,7 +148,8 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, EXPORT_SYMBOL_GPL(nfnetlink_unicast); /* Process one complete nfnetlink message. */ -static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); const struct nfnl_callback *nc; @@ -191,8 +192,8 @@ replay: int attrlen = nlh->nlmsg_len - min_len; __u8 subsys_id = NFNL_SUBSYS_ID(type); - err = nla_parse(cda, ss->cb[cb_id].attr_count, - attr, attrlen, ss->cb[cb_id].policy); + err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen, + ss->cb[cb_id].policy, extack); if (err < 0) { rcu_read_unlock(); return err; @@ -261,7 +262,7 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) struct nfnl_err *nfnl_err, *next; list_for_each_entry_safe(nfnl_err, next, err_list, head) { - netlink_ack(skb, nfnl_err->nlh, nfnl_err->err); + netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, NULL); nfnl_err_del(nfnl_err); } } @@ -284,13 +285,13 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, int err; if (subsys_id >= NFNL_SUBSYS_COUNT) - return netlink_ack(skb, nlh, -EINVAL); + return netlink_ack(skb, nlh, -EINVAL, NULL); replay: status = 0; skb = netlink_skb_clone(oskb, GFP_KERNEL); if (!skb) - return netlink_ack(oskb, nlh, -ENOMEM); + return netlink_ack(oskb, nlh, -ENOMEM, NULL); nfnl_lock(subsys_id); ss = nfnl_dereference_protected(subsys_id); @@ -304,20 +305,20 @@ replay: #endif { nfnl_unlock(subsys_id); - netlink_ack(oskb, nlh, -EOPNOTSUPP); + netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); return kfree_skb(skb); } } if (!ss->commit || !ss->abort) { nfnl_unlock(subsys_id); - netlink_ack(oskb, nlh, -EOPNOTSUPP); + netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); return kfree_skb(skb); } if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) { nfnl_unlock(subsys_id); - netlink_ack(oskb, nlh, -ERESTART); + netlink_ack(oskb, nlh, -ERESTART, NULL); return kfree_skb(skb); } @@ -376,8 +377,8 @@ replay: struct nlattr *attr = (void *)nlh + min_len; int attrlen = nlh->nlmsg_len - min_len; - err = nla_parse(cda, ss->cb[cb_id].attr_count, - attr, attrlen, ss->cb[cb_id].policy); + err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, + attrlen, ss->cb[cb_id].policy, NULL); if (err < 0) goto ack; @@ -407,7 +408,8 @@ ack: * pointing to the batch header. */ nfnl_err_reset(&err_list); - netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM); + netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM, + NULL); status |= NFNL_BATCH_FAILURE; goto done; } @@ -465,9 +467,10 @@ static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh) skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) return; - err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy); + err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy, + NULL); if (err < 0) { - netlink_ack(skb, nlh, err); + netlink_ack(skb, nlh, err, NULL); return; } if (cda[NFNL_BATCH_GENID]) @@ -493,7 +496,7 @@ static void nfnetlink_rcv(struct sk_buff *skb) return; if (!netlink_net_capable(skb, CAP_NET_ADMIN)) { - netlink_ack(skb, nlh, -EPERM); + netlink_ack(skb, nlh, -EPERM, NULL); return; } diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index d44d89b56127..2837d5fb98bd 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/netlink.h> #include <linux/rculist.h> #include <linux/slab.h> @@ -32,7 +33,7 @@ struct nf_acct { atomic64_t bytes; unsigned long flags; struct list_head head; - atomic_t refcnt; + refcount_t refcnt; char name[NFACCT_NAME_MAX]; struct rcu_head rcu_head; char data[0]; @@ -123,7 +124,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl, atomic64_set(&nfacct->pkts, be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS]))); } - atomic_set(&nfacct->refcnt, 1); + refcount_set(&nfacct->refcnt, 1); list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list); return 0; } @@ -166,7 +167,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, NFACCT_PAD) || nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes), NFACCT_PAD) || - nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)))) + nla_put_be32(skb, NFACCT_USE, htonl(refcount_read(&acct->refcnt)))) goto nla_put_failure; if (acct->flags & NFACCT_F_QUOTA) { u64 *quota = (u64 *)acct->data; @@ -243,7 +244,8 @@ nfacct_filter_alloc(const struct nlattr * const attr) struct nlattr *tb[NFACCT_FILTER_MAX + 1]; int err; - err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy); + err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy, + NULL); if (err < 0) return ERR_PTR(err); @@ -329,7 +331,7 @@ static int nfnl_acct_try_del(struct nf_acct *cur) /* We want to avoid races with nfnl_acct_put. So only when the current * refcnt is 1, we decrease it to 0. */ - if (atomic_cmpxchg(&cur->refcnt, 1, 0) == 1) { + if (refcount_dec_if_one(&cur->refcnt)) { /* We are protected by nfnl mutex. */ list_del_rcu(&cur->head); kfree_rcu(cur, rcu_head); @@ -413,7 +415,7 @@ struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name) if (!try_module_get(THIS_MODULE)) goto err; - if (!atomic_inc_not_zero(&cur->refcnt)) { + if (!refcount_inc_not_zero(&cur->refcnt)) { module_put(THIS_MODULE); goto err; } @@ -429,7 +431,7 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get); void nfnl_acct_put(struct nf_acct *acct) { - if (atomic_dec_and_test(&acct->refcnt)) + if (refcount_dec_and_test(&acct->refcnt)) kfree_rcu(acct, rcu_head); module_put(THIS_MODULE); @@ -502,7 +504,7 @@ static void __net_exit nfnl_acct_net_exit(struct net *net) list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) { list_del_rcu(&cur->head); - if (atomic_dec_and_test(&cur->refcnt)) + if (refcount_dec_and_test(&cur->refcnt)) kfree_rcu(cur, rcu_head); } } diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index de8782345c86..5b6c68311566 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -32,6 +32,13 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers"); +struct nfnl_cthelper { + struct list_head list; + struct nf_conntrack_helper helper; +}; + +static LIST_HEAD(nfnl_cthelper_list); + static int nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -70,7 +77,8 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, int err; struct nlattr *tb[NFCTH_TUPLE_MAX+1]; - err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol); + err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, + nfnl_cthelper_tuple_pol, NULL); if (err < 0) return err; @@ -130,7 +138,8 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, int err; struct nlattr *tb[NFCTH_POLICY_MAX+1]; - err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol); + err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, + nfnl_cthelper_expect_pol, NULL); if (err < 0) return err; @@ -161,28 +170,28 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, int i, ret; struct nf_conntrack_expect_policy *expect_policy; struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; + unsigned int class_max; ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, - nfnl_cthelper_expect_policy_set); + nfnl_cthelper_expect_policy_set, NULL); if (ret < 0) return ret; if (!tb[NFCTH_POLICY_SET_NUM]) return -EINVAL; - helper->expect_class_max = - ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); - - if (helper->expect_class_max != 0 && - helper->expect_class_max > NF_CT_MAX_EXPECT_CLASSES) + class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + if (class_max == 0) + return -EINVAL; + if (class_max > NF_CT_MAX_EXPECT_CLASSES) return -EOVERFLOW; expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) * - helper->expect_class_max, GFP_KERNEL); + class_max, GFP_KERNEL); if (expect_policy == NULL) return -ENOMEM; - for (i=0; i<helper->expect_class_max; i++) { + for (i = 0; i < class_max; i++) { if (!tb[NFCTH_POLICY_SET+i]) goto err; @@ -191,6 +200,8 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, if (ret < 0) goto err; } + + helper->expect_class_max = class_max - 1; helper->expect_policy = expect_policy; return 0; err: @@ -203,18 +214,20 @@ nfnl_cthelper_create(const struct nlattr * const tb[], struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; + struct nfnl_cthelper *nfcth; int ret; if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) return -EINVAL; - helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL); - if (helper == NULL) + nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL); + if (nfcth == NULL) return -ENOMEM; + helper = &nfcth->helper; ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) - goto err; + goto err1; strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); @@ -245,15 +258,101 @@ nfnl_cthelper_create(const struct nlattr * const tb[], ret = nf_conntrack_helper_register(helper); if (ret < 0) - goto err; + goto err2; + list_add_tail(&nfcth->list, &nfnl_cthelper_list); return 0; -err: - kfree(helper); +err2: + kfree(helper->expect_policy); +err1: + kfree(nfcth); return ret; } static int +nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, + struct nf_conntrack_expect_policy *new_policy, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, + nfnl_cthelper_expect_pol, NULL); + if (err < 0) + return err; + + if (!tb[NFCTH_POLICY_NAME] || + !tb[NFCTH_POLICY_EXPECT_MAX] || + !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) + return -EINVAL; + + if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name)) + return -EBUSY; + + new_policy->max_expected = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); + new_policy->timeout = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); + + return 0; +} + +static int nfnl_cthelper_update_policy_all(struct nlattr *tb[], + struct nf_conntrack_helper *helper) +{ + struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1]; + struct nf_conntrack_expect_policy *policy; + int i, err; + + /* Check first that all policy attributes are well-formed, so we don't + * leave things in inconsistent state on errors. + */ + for (i = 0; i < helper->expect_class_max + 1; i++) { + + if (!tb[NFCTH_POLICY_SET + i]) + return -EINVAL; + + err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i], + &new_policy[i], + tb[NFCTH_POLICY_SET + i]); + if (err < 0) + return err; + } + /* Now we can safely update them. */ + for (i = 0; i < helper->expect_class_max + 1; i++) { + policy = (struct nf_conntrack_expect_policy *) + &helper->expect_policy[i]; + policy->max_expected = new_policy->max_expected; + policy->timeout = new_policy->timeout; + } + + return 0; +} + +static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1]; + unsigned int class_max; + int err; + + err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set, NULL); + if (err < 0) + return err; + + if (!tb[NFCTH_POLICY_SET_NUM]) + return -EINVAL; + + class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + if (helper->expect_class_max + 1 != class_max) + return -EBUSY; + + return nfnl_cthelper_update_policy_all(tb, helper); +} + +static int nfnl_cthelper_update(const struct nlattr * const tb[], struct nf_conntrack_helper *helper) { @@ -263,8 +362,7 @@ nfnl_cthelper_update(const struct nlattr * const tb[], return -EBUSY; if (tb[NFCTH_POLICY]) { - ret = nfnl_cthelper_parse_expect_policy(helper, - tb[NFCTH_POLICY]); + ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) return ret; } @@ -293,7 +391,8 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; struct nf_conntrack_tuple tuple; - int ret = 0, i; + struct nfnl_cthelper *nlcth; + int ret = 0; if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) return -EINVAL; @@ -304,31 +403,22 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, if (ret < 0) return ret; - rcu_read_lock(); - for (i = 0; i < nf_ct_helper_hsize && !helper; i++) { - hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { + list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) - continue; + if ((tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - if ((tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; - if (nlh->nlmsg_flags & NLM_F_EXCL) { - ret = -EEXIST; - goto err; - } - helper = cur; - break; - } + helper = cur; + break; } - rcu_read_unlock(); if (helper == NULL) ret = nfnl_cthelper_create(tb, &tuple); @@ -336,9 +426,6 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, ret = nfnl_cthelper_update(tb, helper); return ret; -err: - rcu_read_unlock(); - return ret; } static int @@ -377,10 +464,10 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb, goto nla_put_failure; if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM, - htonl(helper->expect_class_max))) + htonl(helper->expect_class_max + 1))) goto nla_put_failure; - for (i=0; i<helper->expect_class_max; i++) { + for (i = 0; i < helper->expect_class_max + 1; i++) { nest_parms2 = nla_nest_start(skb, (NFCTH_POLICY_SET+i) | NLA_F_NESTED); if (nest_parms2 == NULL) @@ -502,11 +589,12 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { - int ret = -ENOENT, i; + int ret = -ENOENT; struct nf_conntrack_helper *cur; struct sk_buff *skb2; char *helper_name = NULL; struct nf_conntrack_tuple tuple; + struct nfnl_cthelper *nlcth; bool tuple_set = false; if (nlh->nlmsg_flags & NLM_F_DUMP) { @@ -527,45 +615,39 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, tuple_set = true; } - for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { + list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; + if (helper_name && + strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; - - if (helper_name && strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) { - continue; - } - if (tuple_set && - (tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) { - ret = -ENOMEM; - break; - } + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + ret = -ENOMEM; + break; + } - ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, - NFNL_MSG_TYPE(nlh->nlmsg_type), - NFNL_MSG_CTHELPER_NEW, cur); - if (ret <= 0) { - kfree_skb(skb2); - break; - } + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_CTHELPER_NEW, cur); + if (ret <= 0) { + kfree_skb(skb2); + break; + } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, - MSG_DONTWAIT); - if (ret > 0) - ret = 0; + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; - /* this avoids a loop in nfnetlink. */ - return ret == -EAGAIN ? -ENOBUFS : ret; - } + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; } return ret; } @@ -576,10 +658,10 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, { char *helper_name = NULL; struct nf_conntrack_helper *cur; - struct hlist_node *tmp; struct nf_conntrack_tuple tuple; bool tuple_set = false, found = false; - int i, j = 0, ret; + struct nfnl_cthelper *nlcth, *n; + int j = 0, ret; if (tb[NFCTH_NAME]) helper_name = nla_data(tb[NFCTH_NAME]); @@ -592,28 +674,27 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple_set = true; } - for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], - hnode) { - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; + j++; - j++; + if (helper_name && + strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (helper_name && strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) { - continue; - } - if (tuple_set && - (tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - found = true; - nf_conntrack_helper_unregister(cur); - } + found = true; + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + + list_del(&nlcth->list); + kfree(nlcth); } + /* Make sure we return success if we flush and there is no helpers */ return (found || j == 0) ? 0 : -ENOENT; } @@ -662,20 +743,16 @@ err_out: static void __exit nfnl_cthelper_exit(void) { struct nf_conntrack_helper *cur; - struct hlist_node *tmp; - int i; + struct nfnl_cthelper *nlcth, *n; nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); - for (i=0; i<nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], - hnode) { - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; - nf_conntrack_helper_unregister(cur); - } + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + kfree(nlcth); } } diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 139e0867e56e..0a3510e7e396 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -56,7 +56,8 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto, struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1]; ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, - attr, l4proto->ctnl_timeout.nla_policy); + attr, l4proto->ctnl_timeout.nla_policy, + NULL); if (ret < 0) return ret; @@ -138,7 +139,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME])); timeout->l3num = l3num; timeout->l4proto = l4proto; - atomic_set(&timeout->refcnt, 1); + refcount_set(&timeout->refcnt, 1); list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list); return 0; @@ -172,7 +173,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) || nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) || nla_put_be32(skb, CTA_TIMEOUT_USE, - htonl(atomic_read(&timeout->refcnt)))) + htonl(refcount_read(&timeout->refcnt)))) goto nla_put_failure; if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { @@ -339,7 +340,7 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) /* We want to avoid races with ctnl_timeout_put. So only when the * current refcnt is 1, we decrease it to 0. */ - if (atomic_cmpxchg(&timeout->refcnt, 1, 0) == 1) { + if (refcount_dec_if_one(&timeout->refcnt)) { /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); nf_ct_l4proto_put(timeout->l4proto); @@ -536,7 +537,7 @@ ctnl_timeout_find_get(struct net *net, const char *name) if (!try_module_get(THIS_MODULE)) goto err; - if (!atomic_inc_not_zero(&timeout->refcnt)) { + if (!refcount_inc_not_zero(&timeout->refcnt)) { module_put(THIS_MODULE); goto err; } @@ -550,7 +551,7 @@ err: static void ctnl_timeout_put(struct ctnl_timeout *timeout) { - if (atomic_dec_and_test(&timeout->refcnt)) + if (refcount_dec_and_test(&timeout->refcnt)) kfree_rcu(timeout, rcu_head); module_put(THIS_MODULE); @@ -601,7 +602,7 @@ static void __net_exit cttimeout_net_exit(struct net *net) list_del_rcu(&cur->head); nf_ct_l4proto_put(cur->l4proto); - if (atomic_dec_and_test(&cur->refcnt)) + if (refcount_dec_and_test(&cur->refcnt)) kfree_rcu(cur, rcu_head); } } @@ -646,8 +647,8 @@ static void __exit cttimeout_exit(void) #ifdef CONFIG_NF_CONNTRACK_TIMEOUT RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); + synchronize_rcu(); #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - rcu_barrier(); } module_init(cttimeout_init); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 08247bf7d7b8..ecd857b75ffe 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -40,6 +40,8 @@ #include <net/netfilter/nfnetlink_log.h> #include <linux/atomic.h> +#include <linux/refcount.h> + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) #include "../bridge/br_private.h" @@ -57,7 +59,7 @@ struct nfulnl_instance { struct hlist_node hlist; /* global list of instances */ spinlock_t lock; - atomic_t use; /* use count */ + refcount_t use; /* use count */ unsigned int qlen; /* number of nlmsgs in skb */ struct sk_buff *skb; /* pre-allocatd skb */ @@ -115,7 +117,7 @@ __instance_lookup(struct nfnl_log_net *log, u_int16_t group_num) static inline void instance_get(struct nfulnl_instance *inst) { - atomic_inc(&inst->use); + refcount_inc(&inst->use); } static struct nfulnl_instance * @@ -125,7 +127,7 @@ instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num) rcu_read_lock_bh(); inst = __instance_lookup(log, group_num); - if (inst && !atomic_inc_not_zero(&inst->use)) + if (inst && !refcount_inc_not_zero(&inst->use)) inst = NULL; rcu_read_unlock_bh(); @@ -145,7 +147,7 @@ static void nfulnl_instance_free_rcu(struct rcu_head *head) static void instance_put(struct nfulnl_instance *inst) { - if (inst && atomic_dec_and_test(&inst->use)) + if (inst && refcount_dec_and_test(&inst->use)) call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu); } @@ -180,7 +182,7 @@ instance_create(struct net *net, u_int16_t group_num, INIT_HLIST_NODE(&inst->hlist); spin_lock_init(&inst->lock); /* needs to be two, since we _put() after creation */ - atomic_set(&inst->use, 2); + refcount_set(&inst->use, 2); setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); @@ -1031,7 +1033,7 @@ static int seq_show(struct seq_file *s, void *v) inst->group_num, inst->peer_portid, inst->qlen, inst->copy_mode, inst->copy_range, - inst->flushtimeout, atomic_read(&inst->use)); + inst->flushtimeout, refcount_read(&inst->use)); return 0; } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 3ee0b8a000a4..3be6fef30581 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -443,7 +443,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { skb_tx_error(entskb); - return NULL; + goto nlmsg_failure; } nlh = nlmsg_put(skb, 0, 0, @@ -452,7 +452,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (!nlh) { skb_tx_error(entskb); kfree_skb(skb); - return NULL; + goto nlmsg_failure; } nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = entry->state.pf; @@ -598,12 +598,17 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } nlh->nlmsg_len = skb->len; + if (seclen) + security_release_secctx(secdata, seclen); return skb; nla_put_failure: skb_tx_error(entskb); kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); +nlmsg_failure: + if (seclen) + security_release_secctx(secdata, seclen); return NULL; } @@ -1104,7 +1109,7 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry, int err; err = nla_parse_nested(tb, NFQA_VLAN_MAX, nfqa[NFQA_VLAN], - nfqa_vlan_policy); + nfqa_vlan_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index c21e7eb8dce0..d76d0f36799f 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -200,7 +200,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) int err; err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr, - nft_rule_compat_policy); + nft_rule_compat_policy, NULL); if (err < 0) return err; @@ -230,10 +230,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; - ret = nft_compat_chain_validate_dependency(target->table, ctx->chain); - if (ret < 0) - goto err; - target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info); if (ctx->nla[NFTA_RULE_COMPAT]) { @@ -419,10 +415,6 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; - ret = nft_compat_chain_validate_dependency(match->table, ctx->chain); - if (ret < 0) - goto err; - match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info); if (ctx->nla[NFTA_RULE_COMPAT]) { diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 7f8422213341..67a710ebde09 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -82,7 +82,8 @@ static int nft_counter_do_init(const struct nlattr * const tb[], return 0; } -static int nft_counter_obj_init(const struct nlattr * const tb[], +static int nft_counter_obj_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], struct nft_object *obj) { struct nft_counter_percpu_priv *priv = nft_obj_data(obj); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index bf548a7a71ec..640fe5a5865e 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -32,6 +32,12 @@ struct nft_ct { }; }; +struct nft_ct_helper_obj { + struct nf_conntrack_helper *helper4; + struct nf_conntrack_helper *helper6; + u8 l4proto; +}; + #ifdef CONFIG_NF_CONNTRACK_ZONES static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template); static unsigned int nft_ct_pcpu_template_refcnt __read_mostly; @@ -83,7 +89,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, switch (priv->key) { case NFT_CT_DIRECTION: - *dest = CTINFO2DIR(ctinfo); + nft_reg_store8(dest, CTINFO2DIR(ctinfo)); return; case NFT_CT_STATUS: *dest = ct->status; @@ -151,20 +157,22 @@ static void nft_ct_get_eval(const struct nft_expr *expr, return; } case NFT_CT_L3PROTOCOL: - *dest = nf_ct_l3num(ct); + nft_reg_store8(dest, nf_ct_l3num(ct)); return; case NFT_CT_PROTOCOL: - *dest = nf_ct_protonum(ct); + nft_reg_store8(dest, nf_ct_protonum(ct)); return; #ifdef CONFIG_NF_CONNTRACK_ZONES case NFT_CT_ZONE: { const struct nf_conntrack_zone *zone = nf_ct_zone(ct); + u16 zoneid; if (priv->dir < IP_CT_DIR_MAX) - *dest = nf_ct_zone_id(zone, priv->dir); + zoneid = nf_ct_zone_id(zone, priv->dir); else - *dest = zone->id; + zoneid = zone->id; + nft_reg_store16(dest, zoneid); return; } #endif @@ -183,10 +191,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; case NFT_CT_PROTO_SRC: - *dest = (__force __u16)tuple->src.u.all; + nft_reg_store16(dest, (__force u16)tuple->src.u.all); return; case NFT_CT_PROTO_DST: - *dest = (__force __u16)tuple->dst.u.all; + nft_reg_store16(dest, (__force u16)tuple->dst.u.all); return; default: break; @@ -205,7 +213,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, const struct nft_ct *priv = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; enum ip_conntrack_info ctinfo; - u16 value = regs->data[priv->sreg]; + u16 value = nft_reg_load16(®s->data[priv->sreg]); struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); @@ -542,7 +550,8 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, case IP_CT_DIR_REPLY: break; default: - return -EINVAL; + err = -EINVAL; + goto err1; } } @@ -730,6 +739,162 @@ static struct nft_expr_type nft_notrack_type __read_mostly = { .owner = THIS_MODULE, }; +static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], + struct nft_object *obj) +{ + struct nft_ct_helper_obj *priv = nft_obj_data(obj); + struct nf_conntrack_helper *help4, *help6; + char name[NF_CT_HELPER_NAME_LEN]; + int family = ctx->afi->family; + + if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO]) + return -EINVAL; + + priv->l4proto = nla_get_u8(tb[NFTA_CT_HELPER_L4PROTO]); + if (!priv->l4proto) + return -ENOENT; + + nla_strlcpy(name, tb[NFTA_CT_HELPER_NAME], sizeof(name)); + + if (tb[NFTA_CT_HELPER_L3PROTO]) + family = ntohs(nla_get_be16(tb[NFTA_CT_HELPER_L3PROTO])); + + help4 = NULL; + help6 = NULL; + + switch (family) { + case NFPROTO_IPV4: + if (ctx->afi->family == NFPROTO_IPV6) + return -EINVAL; + + help4 = nf_conntrack_helper_try_module_get(name, family, + priv->l4proto); + break; + case NFPROTO_IPV6: + if (ctx->afi->family == NFPROTO_IPV4) + return -EINVAL; + + help6 = nf_conntrack_helper_try_module_get(name, family, + priv->l4proto); + break; + case NFPROTO_NETDEV: /* fallthrough */ + case NFPROTO_BRIDGE: /* same */ + case NFPROTO_INET: + help4 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV4, + priv->l4proto); + help6 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV6, + priv->l4proto); + break; + default: + return -EAFNOSUPPORT; + } + + /* && is intentional; only error if INET found neither ipv4 or ipv6 */ + if (!help4 && !help6) + return -ENOENT; + + priv->helper4 = help4; + priv->helper6 = help6; + + return 0; +} + +static void nft_ct_helper_obj_destroy(struct nft_object *obj) +{ + struct nft_ct_helper_obj *priv = nft_obj_data(obj); + + if (priv->helper4) + module_put(priv->helper4->me); + if (priv->helper6) + module_put(priv->helper6->me); +} + +static void nft_ct_helper_obj_eval(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_ct_helper_obj *priv = nft_obj_data(obj); + struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb); + struct nf_conntrack_helper *to_assign = NULL; + struct nf_conn_help *help; + + if (!ct || + nf_ct_is_confirmed(ct) || + nf_ct_is_template(ct) || + priv->l4proto != nf_ct_protonum(ct)) + return; + + switch (nf_ct_l3num(ct)) { + case NFPROTO_IPV4: + to_assign = priv->helper4; + break; + case NFPROTO_IPV6: + to_assign = priv->helper6; + break; + default: + WARN_ON_ONCE(1); + return; + } + + if (!to_assign) + return; + + if (test_bit(IPS_HELPER_BIT, &ct->status)) + return; + + help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC); + if (help) { + rcu_assign_pointer(help->helper, to_assign); + set_bit(IPS_HELPER_BIT, &ct->status); + } +} + +static int nft_ct_helper_obj_dump(struct sk_buff *skb, + struct nft_object *obj, bool reset) +{ + const struct nft_ct_helper_obj *priv = nft_obj_data(obj); + const struct nf_conntrack_helper *helper = priv->helper4; + u16 family; + + if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name)) + return -1; + + if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto)) + return -1; + + if (priv->helper4 && priv->helper6) + family = NFPROTO_INET; + else if (priv->helper6) + family = NFPROTO_IPV6; + else + family = NFPROTO_IPV4; + + if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family))) + return -1; + + return 0; +} + +static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = { + [NFTA_CT_HELPER_NAME] = { .type = NLA_STRING, + .len = NF_CT_HELPER_NAME_LEN - 1 }, + [NFTA_CT_HELPER_L3PROTO] = { .type = NLA_U16 }, + [NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 }, +}; + +static struct nft_object_type nft_ct_helper_obj __read_mostly = { + .type = NFT_OBJECT_CT_HELPER, + .size = sizeof(struct nft_ct_helper_obj), + .maxattr = NFTA_CT_HELPER_MAX, + .policy = nft_ct_helper_policy, + .eval = nft_ct_helper_obj_eval, + .init = nft_ct_helper_obj_init, + .destroy = nft_ct_helper_obj_destroy, + .dump = nft_ct_helper_obj_dump, + .owner = THIS_MODULE, +}; + static int __init nft_ct_module_init(void) { int err; @@ -744,7 +909,14 @@ static int __init nft_ct_module_init(void) if (err < 0) goto err1; + err = nft_register_obj(&nft_ct_helper_obj); + if (err < 0) + goto err2; + return 0; + +err2: + nft_unregister_expr(&nft_notrack_type); err1: nft_unregister_expr(&nft_ct_type); return err; @@ -752,6 +924,7 @@ err1: static void __exit nft_ct_module_exit(void) { + nft_unregister_obj(&nft_ct_helper_obj); nft_unregister_expr(&nft_notrack_type); nft_unregister_expr(&nft_ct_type); } @@ -763,3 +936,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_EXPR("ct"); MODULE_ALIAS_NFT_EXPR("notrack"); +MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 049ad2d9ee66..3948da380259 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -133,16 +133,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx, priv->invert = true; } - set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME], - genmask); - if (IS_ERR(set)) { - if (tb[NFTA_DYNSET_SET_ID]) - set = nf_tables_set_lookup_byid(ctx->net, - tb[NFTA_DYNSET_SET_ID], - genmask); - if (IS_ERR(set)) - return PTR_ERR(set); - } + set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_DYNSET_SET_NAME], + tb[NFTA_DYNSET_SET_ID], genmask); + if (IS_ERR(set)) + return PTR_ERR(set); if (set->ops->update == NULL) return -EOPNOTSUPP; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index c308920b194c..d212a85d2f33 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -98,14 +98,21 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr, goto err; offset = i + priv->offset; - dest[priv->len / NFT_REG32_SIZE] = 0; - memcpy(dest, opt + offset, priv->len); + if (priv->flags & NFT_EXTHDR_F_PRESENT) { + *dest = 1; + } else { + dest[priv->len / NFT_REG32_SIZE] = 0; + memcpy(dest, opt + offset, priv->len); + } return; } err: - regs->verdict.code = NFT_BREAK; + if (priv->flags & NFT_EXTHDR_F_PRESENT) + *dest = 0; + else + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 29a4906adc27..21df8cccea65 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -24,7 +24,8 @@ const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = { EXPORT_SYMBOL(nft_fib_policy); #define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \ - NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF) + NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \ + NFTA_FIB_F_PRESENT) int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) @@ -112,7 +113,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; - return nft_fib_validate(ctx, expr, NULL); + return 0; } EXPORT_SYMBOL_GPL(nft_fib_init); @@ -133,19 +134,22 @@ int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr) } EXPORT_SYMBOL_GPL(nft_fib_dump); -void nft_fib_store_result(void *reg, enum nft_fib_result r, +void nft_fib_store_result(void *reg, const struct nft_fib *priv, const struct nft_pktinfo *pkt, int index) { struct net_device *dev; u32 *dreg = reg; - switch (r) { + switch (priv->result) { case NFT_FIB_RESULT_OIF: - *dreg = index; + *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index; break; case NFT_FIB_RESULT_OIFNAME: dev = dev_get_by_index_rcu(nft_net(pkt), index); - strncpy(reg, dev ? dev->name : "", IFNAMSIZ); + if (priv->flags & NFTA_FIB_F_PRESENT) + *dreg = !!dev; + else + strncpy(reg, dev ? dev->name : "", IFNAMSIZ); break; default: WARN_ON_ONCE(1); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index eb2721af898d..52a5079a91a3 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -17,20 +17,21 @@ #include <net/netfilter/nf_tables_core.h> #include <linux/jhash.h> -struct nft_hash { +struct nft_jhash { enum nft_registers sreg:8; enum nft_registers dreg:8; u8 len; + bool autogen_seed:1; u32 modulus; u32 seed; u32 offset; }; -static void nft_hash_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_jhash_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { - struct nft_hash *priv = nft_expr_priv(expr); + struct nft_jhash *priv = nft_expr_priv(expr); const void *data = ®s->data[priv->sreg]; u32 h; @@ -38,6 +39,25 @@ static void nft_hash_eval(const struct nft_expr *expr, regs->data[priv->dreg] = h + priv->offset; } +struct nft_symhash { + enum nft_registers dreg:8; + u32 modulus; + u32 offset; +}; + +static void nft_symhash_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_symhash *priv = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + u32 h; + + h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus); + + regs->data[priv->dreg] = h + priv->offset; +} + static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_SREG] = { .type = NLA_U32 }, [NFTA_HASH_DREG] = { .type = NLA_U32 }, @@ -45,13 +65,14 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_MODULUS] = { .type = NLA_U32 }, [NFTA_HASH_SEED] = { .type = NLA_U32 }, [NFTA_HASH_OFFSET] = { .type = NLA_U32 }, + [NFTA_HASH_TYPE] = { .type = NLA_U32 }, }; -static int nft_hash_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_jhash_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { - struct nft_hash *priv = nft_expr_priv(expr); + struct nft_jhash *priv = nft_expr_priv(expr); u32 len; int err; @@ -82,20 +103,48 @@ static int nft_hash_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - if (tb[NFTA_HASH_SEED]) + if (tb[NFTA_HASH_SEED]) { priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); - else + } else { + priv->autogen_seed = true; get_random_bytes(&priv->seed, sizeof(priv->seed)); + } return nft_validate_register_load(priv->sreg, len) && nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, sizeof(u32)); } -static int nft_hash_dump(struct sk_buff *skb, - const struct nft_expr *expr) +static int nft_symhash_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { - const struct nft_hash *priv = nft_expr_priv(expr); + struct nft_symhash *priv = nft_expr_priv(expr); + + if (!tb[NFTA_HASH_DREG] || + !tb[NFTA_HASH_MODULUS]) + return -EINVAL; + + if (tb[NFTA_HASH_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); + + priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); + + priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); + if (priv->modulus <= 1) + return -ERANGE; + + if (priv->offset + priv->modulus - 1 < priv->offset) + return -EOVERFLOW; + + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, sizeof(u32)); +} + +static int nft_jhash_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_jhash *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg)) goto nla_put_failure; @@ -105,11 +154,34 @@ static int nft_hash_dump(struct sk_buff *skb, goto nla_put_failure; if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus))) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed))) + if (!priv->autogen_seed && + nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed))) goto nla_put_failure; if (priv->offset != 0) if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset))) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_JENKINS))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static int nft_symhash_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_symhash *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus))) + goto nla_put_failure; + if (priv->offset != 0) + if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_SYM))) + goto nla_put_failure; return 0; nla_put_failure: @@ -117,17 +189,46 @@ nla_put_failure: } static struct nft_expr_type nft_hash_type; -static const struct nft_expr_ops nft_hash_ops = { +static const struct nft_expr_ops nft_jhash_ops = { .type = &nft_hash_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)), - .eval = nft_hash_eval, - .init = nft_hash_init, - .dump = nft_hash_dump, + .size = NFT_EXPR_SIZE(sizeof(struct nft_jhash)), + .eval = nft_jhash_eval, + .init = nft_jhash_init, + .dump = nft_jhash_dump, }; +static const struct nft_expr_ops nft_symhash_ops = { + .type = &nft_hash_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)), + .eval = nft_symhash_eval, + .init = nft_symhash_init, + .dump = nft_symhash_dump, +}; + +static const struct nft_expr_ops * +nft_hash_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + u32 type; + + if (!tb[NFTA_HASH_TYPE]) + return &nft_jhash_ops; + + type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE])); + switch (type) { + case NFT_HASH_SYM: + return &nft_symhash_ops; + case NFT_HASH_JENKINS: + return &nft_jhash_ops; + default: + break; + } + return ERR_PTR(-EOPNOTSUPP); +} + static struct nft_expr_type nft_hash_type __read_mostly = { .name = "hash", - .ops = &nft_hash_ops, + .select_ops = &nft_hash_select_ops, .policy = nft_hash_policy, .maxattr = NFTA_HASH_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index c6baf412236d..18dd57a52651 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -17,9 +17,8 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> -static DEFINE_SPINLOCK(limit_lock); - struct nft_limit { + spinlock_t lock; u64 last; u64 tokens; u64 tokens_max; @@ -34,7 +33,7 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) u64 now, tokens; s64 delta; - spin_lock_bh(&limit_lock); + spin_lock_bh(&limit->lock); now = ktime_get_ns(); tokens = limit->tokens + now - limit->last; if (tokens > limit->tokens_max) @@ -44,11 +43,11 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) delta = tokens - cost; if (delta >= 0) { limit->tokens = delta; - spin_unlock_bh(&limit_lock); + spin_unlock_bh(&limit->lock); return limit->invert; } limit->tokens = tokens; - spin_unlock_bh(&limit_lock); + spin_unlock_bh(&limit->lock); return !limit->invert; } @@ -86,6 +85,7 @@ static int nft_limit_init(struct nft_limit *limit, limit->invert = true; } limit->last = ktime_get_ns(); + spin_lock_init(&limit->lock); return 0; } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index e21aea7e5ec8..475570e89ede 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -71,16 +71,10 @@ static int nft_lookup_init(const struct nft_ctx *ctx, tb[NFTA_LOOKUP_SREG] == NULL) return -EINVAL; - set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET], genmask); - if (IS_ERR(set)) { - if (tb[NFTA_LOOKUP_SET_ID]) { - set = nf_tables_set_lookup_byid(ctx->net, - tb[NFTA_LOOKUP_SET_ID], - genmask); - } - if (IS_ERR(set)) - return PTR_ERR(set); - } + set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET], + tb[NFTA_LOOKUP_SET_ID], genmask); + if (IS_ERR(set)) + return PTR_ERR(set); if (set->flags & NFT_SET_EVAL) return -EOPNOTSUPP; diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 11ce016cd479..6ac03d4266c9 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -46,10 +46,6 @@ int nft_masq_init(const struct nft_ctx *ctx, struct nft_masq *priv = nft_expr_priv(expr); int err; - err = nft_masq_validate(ctx, expr, NULL); - if (err) - return err; - if (tb[NFTA_MASQ_FLAGS]) { priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS])); if (priv->flags & ~NF_NAT_RANGE_MASK) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index e1f5ca9b423b..9563ce3c23aa 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -45,16 +45,15 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = skb->len; break; case NFT_META_PROTOCOL: - *dest = 0; - *(__be16 *)dest = skb->protocol; + nft_reg_store16(dest, (__force u16)skb->protocol); break; case NFT_META_NFPROTO: - *dest = nft_pf(pkt); + nft_reg_store8(dest, nft_pf(pkt)); break; case NFT_META_L4PROTO: if (!pkt->tprot_set) goto err; - *dest = pkt->tprot; + nft_reg_store8(dest, pkt->tprot); break; case NFT_META_PRIORITY: *dest = skb->priority; @@ -85,14 +84,12 @@ void nft_meta_get_eval(const struct nft_expr *expr, case NFT_META_IIFTYPE: if (in == NULL) goto err; - *dest = 0; - *(u16 *)dest = in->type; + nft_reg_store16(dest, in->type); break; case NFT_META_OIFTYPE: if (out == NULL) goto err; - *dest = 0; - *(u16 *)dest = out->type; + nft_reg_store16(dest, out->type); break; case NFT_META_SKUID: sk = skb_to_full_sk(skb); @@ -142,19 +139,19 @@ void nft_meta_get_eval(const struct nft_expr *expr, #endif case NFT_META_PKTTYPE: if (skb->pkt_type != PACKET_LOOPBACK) { - *dest = skb->pkt_type; + nft_reg_store8(dest, skb->pkt_type); break; } switch (nft_pf(pkt)) { case NFPROTO_IPV4: if (ipv4_is_multicast(ip_hdr(skb)->daddr)) - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); else - *dest = PACKET_BROADCAST; + nft_reg_store8(dest, PACKET_BROADCAST); break; case NFPROTO_IPV6: - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); break; case NFPROTO_NETDEV: switch (skb->protocol) { @@ -168,14 +165,14 @@ void nft_meta_get_eval(const struct nft_expr *expr, goto err; if (ipv4_is_multicast(iph->daddr)) - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); else - *dest = PACKET_BROADCAST; + nft_reg_store8(dest, PACKET_BROADCAST); break; } case htons(ETH_P_IPV6): - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); break; default: WARN_ON_ONCE(1); @@ -230,7 +227,9 @@ void nft_meta_set_eval(const struct nft_expr *expr, { const struct nft_meta *meta = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; - u32 value = regs->data[meta->sreg]; + u32 *sreg = ®s->data[meta->sreg]; + u32 value = *sreg; + u8 pkt_type; switch (meta->key) { case NFT_META_MARK: @@ -240,9 +239,12 @@ void nft_meta_set_eval(const struct nft_expr *expr, skb->priority = value; break; case NFT_META_PKTTYPE: - if (skb->pkt_type != value && - skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type)) - skb->pkt_type = value; + pkt_type = nft_reg_load8(sreg); + + if (skb->pkt_type != pkt_type && + skb_pkt_type_ok(pkt_type) && + skb_pkt_type_ok(skb->pkt_type)) + skb->pkt_type = pkt_type; break; case NFT_META_NFTRACE: skb->nf_trace = !!value; @@ -370,10 +372,6 @@ int nft_meta_set_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - err = nft_meta_set_validate(ctx, expr, NULL); - if (err < 0) - return err; - priv->sreg = nft_parse_register(tb[NFTA_META_SREG]); err = nft_validate_register_load(priv->sreg, len); if (err < 0) diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 19a7bf3236f9..ed548d06b6dd 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -65,10 +65,10 @@ static void nft_nat_eval(const struct nft_expr *expr, } if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } @@ -138,10 +138,6 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; } - err = nft_nat_validate(ctx, expr, NULL); - if (err < 0) - return err; - if (tb[NFTA_NAT_FAMILY] == NULL) return -EINVAL; diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 1ae8c49ca4a1..1dd428fbaaa3 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -116,16 +116,10 @@ static int nft_objref_map_init(const struct nft_ctx *ctx, struct nft_set *set; int err; - set = nf_tables_set_lookup(ctx->table, tb[NFTA_OBJREF_SET_NAME], genmask); - if (IS_ERR(set)) { - if (tb[NFTA_OBJREF_SET_ID]) { - set = nf_tables_set_lookup_byid(ctx->net, - tb[NFTA_OBJREF_SET_ID], - genmask); - } - if (IS_ERR(set)) - return PTR_ERR(set); - } + set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_OBJREF_SET_NAME], + tb[NFTA_OBJREF_SET_ID], genmask); + if (IS_ERR(set)) + return PTR_ERR(set); if (!(set->flags & NFT_SET_OBJECT)) return -EINVAL; diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 2d6fe3559912..25e33159be57 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -99,7 +99,8 @@ static int nft_quota_do_init(const struct nlattr * const tb[], return 0; } -static int nft_quota_obj_init(const struct nlattr * const tb[], +static int nft_quota_obj_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], struct nft_object *obj) { struct nft_quota *priv = nft_obj_data(obj); diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 40dcd05146d5..1e66538bf0ff 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -47,10 +47,6 @@ int nft_redir_init(const struct nft_ctx *ctx, unsigned int plen; int err; - err = nft_redir_validate(ctx, expr, NULL); - if (err < 0) - return err; - plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { priv->sreg_proto_min = diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index c64de3f7379d..29f5bd2377b0 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -42,11 +42,6 @@ int nft_reject_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); - int err; - - err = nft_reject_validate(ctx, expr, NULL); - if (err < 0) - return err; if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 9e90a02cb104..5a7fb5ff867d 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -66,11 +66,7 @@ static int nft_reject_inet_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); - int icmp_code, err; - - err = nft_reject_validate(ctx, expr, NULL); - if (err < 0) - return err; + int icmp_code; if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 152d226552c1..8ebbc2940f4c 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -15,6 +15,11 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +struct nft_bitmap_elem { + struct list_head head; + struct nft_set_ext ext; +}; + /* This bitmap uses two bits to represent one element. These two bits determine * the element state in the current and the future generation. * @@ -41,13 +46,22 @@ * restore its previous state. */ struct nft_bitmap { - u16 bitmap_size; - u8 bitmap[]; + struct list_head list; + u16 bitmap_size; + u8 bitmap[]; }; -static inline void nft_bitmap_location(u32 key, u32 *idx, u32 *off) +static inline void nft_bitmap_location(const struct nft_set *set, + const void *key, + u32 *idx, u32 *off) { - u32 k = (key << 1); + u32 k; + + if (set->klen == 2) + k = *(u16 *)key; + else + k = *(u8 *)key; + k <<= 1; *idx = k / BITS_PER_BYTE; *off = k % BITS_PER_BYTE; @@ -69,26 +83,48 @@ static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set, u8 genmask = nft_genmask_cur(net); u32 idx, off; - nft_bitmap_location(*key, &idx, &off); + nft_bitmap_location(set, key, &idx, &off); return nft_bitmap_active(priv->bitmap, idx, off, genmask); } +static struct nft_bitmap_elem * +nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this, + u8 genmask) +{ + const struct nft_bitmap *priv = nft_set_priv(set); + struct nft_bitmap_elem *be; + + list_for_each_entry_rcu(be, &priv->list, head) { + if (memcmp(nft_set_ext_key(&be->ext), + nft_set_ext_key(&this->ext), set->klen) || + !nft_set_elem_active(&be->ext, genmask)) + continue; + + return be; + } + return NULL; +} + static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **_ext) + struct nft_set_ext **ext) { struct nft_bitmap *priv = nft_set_priv(set); - struct nft_set_ext *ext = elem->priv; + struct nft_bitmap_elem *new = elem->priv, *be; u8 genmask = nft_genmask_next(net); u32 idx, off; - nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); - if (nft_bitmap_active(priv->bitmap, idx, off, genmask)) + be = nft_bitmap_elem_find(set, new, genmask); + if (be) { + *ext = &be->ext; return -EEXIST; + } + nft_bitmap_location(set, nft_set_ext_key(&new->ext), &idx, &off); /* Enter 01 state. */ priv->bitmap[idx] |= (genmask << off); + list_add_tail_rcu(&new->head, &priv->list); return 0; } @@ -98,13 +134,14 @@ static void nft_bitmap_remove(const struct net *net, const struct nft_set_elem *elem) { struct nft_bitmap *priv = nft_set_priv(set); - struct nft_set_ext *ext = elem->priv; + struct nft_bitmap_elem *be = elem->priv; u8 genmask = nft_genmask_next(net); u32 idx, off; - nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 00 state. */ priv->bitmap[idx] &= ~(genmask << off); + list_del_rcu(&be->head); } static void nft_bitmap_activate(const struct net *net, @@ -112,74 +149,52 @@ static void nft_bitmap_activate(const struct net *net, const struct nft_set_elem *elem) { struct nft_bitmap *priv = nft_set_priv(set); - struct nft_set_ext *ext = elem->priv; + struct nft_bitmap_elem *be = elem->priv; u8 genmask = nft_genmask_next(net); u32 idx, off; - nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 11 state. */ priv->bitmap[idx] |= (genmask << off); + nft_set_elem_change_active(net, set, &be->ext); } static bool nft_bitmap_flush(const struct net *net, - const struct nft_set *set, void *ext) + const struct nft_set *set, void *_be) { struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); + struct nft_bitmap_elem *be = _be; u32 idx, off; - nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 10 state, similar to deactivation. */ priv->bitmap[idx] &= ~(genmask << off); + nft_set_elem_change_active(net, set, &be->ext); return true; } -static struct nft_set_ext *nft_bitmap_ext_alloc(const struct nft_set *set, - const struct nft_set_elem *elem) -{ - struct nft_set_ext_tmpl tmpl; - struct nft_set_ext *ext; - - nft_set_ext_prepare(&tmpl); - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); - - ext = kzalloc(tmpl.len, GFP_KERNEL); - if (!ext) - return NULL; - - nft_set_ext_init(ext, &tmpl); - memcpy(nft_set_ext_key(ext), elem->key.val.data, set->klen); - - return ext; -} - static void *nft_bitmap_deactivate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_bitmap *priv = nft_set_priv(set); + struct nft_bitmap_elem *this = elem->priv, *be; u8 genmask = nft_genmask_next(net); - struct nft_set_ext *ext; - u32 idx, off, key = 0; - - memcpy(&key, elem->key.val.data, set->klen); - nft_bitmap_location(key, &idx, &off); + u32 idx, off; - if (!nft_bitmap_active(priv->bitmap, idx, off, genmask)) - return NULL; + nft_bitmap_location(set, elem->key.val.data, &idx, &off); - /* We have no real set extension since this is a bitmap, allocate this - * dummy object that is released from the commit/abort path. - */ - ext = nft_bitmap_ext_alloc(set, elem); - if (!ext) + be = nft_bitmap_elem_find(set, this, genmask); + if (!be) return NULL; /* Enter 10 state. */ priv->bitmap[idx] &= ~(genmask << off); + nft_set_elem_change_active(net, set, &be->ext); - return ext; + return be; } static void nft_bitmap_walk(const struct nft_ctx *ctx, @@ -187,47 +202,23 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx, struct nft_set_iter *iter) { const struct nft_bitmap *priv = nft_set_priv(set); - struct nft_set_ext_tmpl tmpl; + struct nft_bitmap_elem *be; struct nft_set_elem elem; - struct nft_set_ext *ext; - int idx, off; - u16 key; - - nft_set_ext_prepare(&tmpl); - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); - - for (idx = 0; idx < priv->bitmap_size; idx++) { - for (off = 0; off < BITS_PER_BYTE; off += 2) { - if (iter->count < iter->skip) - goto cont; - - if (!nft_bitmap_active(priv->bitmap, idx, off, - iter->genmask)) - goto cont; - - ext = kzalloc(tmpl.len, GFP_KERNEL); - if (!ext) { - iter->err = -ENOMEM; - return; - } - nft_set_ext_init(ext, &tmpl); - key = ((idx * BITS_PER_BYTE) + off) >> 1; - memcpy(nft_set_ext_key(ext), &key, set->klen); - - elem.priv = ext; - iter->err = iter->fn(ctx, set, iter, &elem); - - /* On set flush, this dummy extension object is released - * from the commit/abort path. - */ - if (!iter->flush) - kfree(ext); - - if (iter->err < 0) - return; + + list_for_each_entry_rcu(be, &priv->list, head) { + if (iter->count < iter->skip) + goto cont; + if (!nft_set_elem_active(&be->ext, iter->genmask)) + goto cont; + + elem.priv = be; + + iter->err = iter->fn(ctx, set, iter, &elem); + + if (iter->err < 0) + return; cont: - iter->count++; - } + iter->count++; } } @@ -258,6 +249,7 @@ static int nft_bitmap_init(const struct nft_set *set, { struct nft_bitmap *priv = nft_set_priv(set); + INIT_LIST_HEAD(&priv->list); priv->bitmap_size = nft_bitmap_size(set->klen); return 0; @@ -283,6 +275,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, static struct nft_set_ops nft_bitmap_ops __read_mostly = { .privsize = nft_bitmap_privsize, + .elemsize = offsetof(struct nft_bitmap_elem, ext), .estimate = nft_bitmap_estimate, .init = nft_bitmap_init, .destroy = nft_bitmap_destroy, diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 78dfbf9588b3..e97e2fb53f0a 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -18,9 +18,8 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> -static DEFINE_SPINLOCK(nft_rbtree_lock); - struct nft_rbtree { + rwlock_t lock; struct rb_root root; }; @@ -44,14 +43,14 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this, static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { - const struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; u8 genmask = nft_genmask_cur(net); const struct rb_node *parent; const void *this; int d; - spin_lock_bh(&nft_rbtree_lock); + read_lock_bh(&priv->lock); parent = priv->root.rb_node; while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); @@ -75,7 +74,7 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, } if (nft_rbtree_interval_end(rbe)) goto out; - spin_unlock_bh(&nft_rbtree_lock); + read_unlock_bh(&priv->lock); *ext = &rbe->ext; return true; @@ -85,12 +84,12 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && !nft_rbtree_interval_end(interval)) { - spin_unlock_bh(&nft_rbtree_lock); + read_unlock_bh(&priv->lock); *ext = &interval->ext; return true; } out: - spin_unlock_bh(&nft_rbtree_lock); + read_unlock_bh(&priv->lock); return false; } @@ -140,12 +139,13 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) { + struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe = elem->priv; int err; - spin_lock_bh(&nft_rbtree_lock); + write_lock_bh(&priv->lock); err = __nft_rbtree_insert(net, set, rbe, ext); - spin_unlock_bh(&nft_rbtree_lock); + write_unlock_bh(&priv->lock); return err; } @@ -157,9 +157,9 @@ static void nft_rbtree_remove(const struct net *net, struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe = elem->priv; - spin_lock_bh(&nft_rbtree_lock); + write_lock_bh(&priv->lock); rb_erase(&rbe->node, &priv->root); - spin_unlock_bh(&nft_rbtree_lock); + write_unlock_bh(&priv->lock); } static void nft_rbtree_activate(const struct net *net, @@ -224,12 +224,12 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { - const struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; struct nft_set_elem elem; struct rb_node *node; - spin_lock_bh(&nft_rbtree_lock); + read_lock_bh(&priv->lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { rbe = rb_entry(node, struct nft_rbtree_elem, node); @@ -242,13 +242,13 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) { - spin_unlock_bh(&nft_rbtree_lock); + read_unlock_bh(&priv->lock); return; } cont: iter->count++; } - spin_unlock_bh(&nft_rbtree_lock); + read_unlock_bh(&priv->lock); } static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) @@ -262,6 +262,7 @@ static int nft_rbtree_init(const struct nft_set *set, { struct nft_rbtree *priv = nft_set_priv(set); + rwlock_init(&priv->lock); priv->root = RB_ROOT; return 0; } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 27241a767f17..c64aca611ac5 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -104,7 +104,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); tcp_hdrlen = tcph->doff * 4; - if (len < tcp_hdrlen) + if (len < tcp_hdrlen || tcp_hdrlen < sizeof(struct tcphdr)) return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { @@ -152,6 +152,10 @@ tcpmss_mangle_packet(struct sk_buff *skb, if (len > tcp_hdrlen) return 0; + /* tcph->doff has 4 bits, do not wrap it to 0 */ + if (tcp_hdrlen >= 15 * 4) + return 0; + /* * MSS Option not found ?! add it.. */ diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 80cb7babeb64..df7f1df00330 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -393,7 +393,8 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, rcu_read_lock(); indev = __in6_dev_get(skb->dev); - if (indev) + if (indev) { + read_lock_bh(&indev->lock); list_for_each_entry(ifa, &indev->addr_list, if_list) { if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED)) continue; @@ -401,6 +402,8 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, laddr = &ifa->addr; break; } + read_unlock_bh(&indev->lock); + } rcu_read_unlock(); return laddr ? laddr : daddr; diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index dab962df1787..d27b5f1ea619 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -18,6 +18,7 @@ #include <linux/netfilter/xt_limit.h> struct xt_limit_priv { + spinlock_t lock; unsigned long prev; uint32_t credit; }; @@ -32,8 +33,6 @@ MODULE_ALIAS("ip6t_limit"); * see net/sched/sch_tbf.c in the linux source tree */ -static DEFINE_SPINLOCK(limit_lock); - /* Rusty: This is my (non-mathematically-inclined) understanding of this algorithm. The `average rate' in jiffies becomes your initial amount of credit `credit' and the most credit you can ever have @@ -72,7 +71,7 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par) struct xt_limit_priv *priv = r->master; unsigned long now = jiffies; - spin_lock_bh(&limit_lock); + spin_lock_bh(&priv->lock); priv->credit += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY; if (priv->credit > r->credit_cap) priv->credit = r->credit_cap; @@ -80,11 +79,11 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par) if (priv->credit >= r->cost) { /* We're not limited. */ priv->credit -= r->cost; - spin_unlock_bh(&limit_lock); + spin_unlock_bh(&priv->lock); return true; } - spin_unlock_bh(&limit_lock); + spin_unlock_bh(&priv->lock); return false; } @@ -126,6 +125,8 @@ static int limit_mt_check(const struct xt_mtchk_param *par) r->credit_cap = priv->credit; /* Credits full. */ r->cost = user2credits(r->avg); } + spin_lock_init(&priv->lock); + return 0; } diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 4149d3e63589..9aacf2da3d98 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -101,7 +101,7 @@ static int netlbl_cipsov4_add_common(struct genl_info *info, if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_TAGLST], NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy) != 0) + netlbl_cipsov4_genl_policy, NULL) != 0) return -EINVAL; nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem) @@ -148,7 +148,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy) != 0) + netlbl_cipsov4_genl_policy, NULL) != 0) return -EINVAL; doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); @@ -170,10 +170,10 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], nla_a_rem) if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) { - if (nla_validate_nested(nla_a, - NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy) != 0) - goto add_std_failure; + if (nla_validate_nested(nla_a, NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy, + NULL) != 0) + goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) switch (nla_type(nla_b)) { case NLBL_CIPSOV4_A_MLSLVLLOC: @@ -236,7 +236,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, if (info->attrs[NLBL_CIPSOV4_A_MLSCATLST]) { if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSCATLST], NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy) != 0) + netlbl_cipsov4_genl_policy, NULL) != 0) goto add_std_failure; nla_for_each_nested(nla_a, @@ -244,8 +244,9 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, nla_a_rem) if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) { if (nla_validate_nested(nla_a, - NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy) != 0) + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy, + NULL) != 0) goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) switch (nla_type(nla_b)) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7b73c7c161a9..ee841f00a6ec 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -78,14 +78,6 @@ struct listeners { /* state bits */ #define NETLINK_S_CONGESTED 0x0 -/* flags */ -#define NETLINK_F_KERNEL_SOCKET 0x1 -#define NETLINK_F_RECV_PKTINFO 0x2 -#define NETLINK_F_BROADCAST_SEND_ERROR 0x4 -#define NETLINK_F_RECV_NO_ENOBUFS 0x8 -#define NETLINK_F_LISTEN_ALL_NSID 0x10 -#define NETLINK_F_CAP_ACK 0x20 - static inline int netlink_is_kernel(struct sock *sk) { return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET; @@ -96,6 +88,44 @@ EXPORT_SYMBOL_GPL(nl_table); static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); +static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS]; + +static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = { + "nlk_cb_mutex-ROUTE", + "nlk_cb_mutex-1", + "nlk_cb_mutex-USERSOCK", + "nlk_cb_mutex-FIREWALL", + "nlk_cb_mutex-SOCK_DIAG", + "nlk_cb_mutex-NFLOG", + "nlk_cb_mutex-XFRM", + "nlk_cb_mutex-SELINUX", + "nlk_cb_mutex-ISCSI", + "nlk_cb_mutex-AUDIT", + "nlk_cb_mutex-FIB_LOOKUP", + "nlk_cb_mutex-CONNECTOR", + "nlk_cb_mutex-NETFILTER", + "nlk_cb_mutex-IP6_FW", + "nlk_cb_mutex-DNRTMSG", + "nlk_cb_mutex-KOBJECT_UEVENT", + "nlk_cb_mutex-GENERIC", + "nlk_cb_mutex-17", + "nlk_cb_mutex-SCSITRANSPORT", + "nlk_cb_mutex-ECRYPTFS", + "nlk_cb_mutex-RDMA", + "nlk_cb_mutex-CRYPTO", + "nlk_cb_mutex-SMC", + "nlk_cb_mutex-23", + "nlk_cb_mutex-24", + "nlk_cb_mutex-25", + "nlk_cb_mutex-26", + "nlk_cb_mutex-27", + "nlk_cb_mutex-28", + "nlk_cb_mutex-29", + "nlk_cb_mutex-30", + "nlk_cb_mutex-31", + "nlk_cb_mutex-MAX_LINKS" +}; + static int netlink_dump(struct sock *sk); static void netlink_skb_destructor(struct sk_buff *skb); @@ -585,6 +615,9 @@ static int __netlink_create(struct net *net, struct socket *sock, } else { nlk->cb_mutex = &nlk->cb_def_mutex; mutex_init(nlk->cb_mutex); + lockdep_set_class_and_name(nlk->cb_mutex, + nlk_cb_mutex_keys + protocol, + nlk_cb_mutex_key_strings[protocol]); } init_waitqueue_head(&nlk->wait); @@ -1619,6 +1652,13 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, nlk->flags &= ~NETLINK_F_CAP_ACK; err = 0; break; + case NETLINK_EXT_ACK: + if (val) + nlk->flags |= NETLINK_F_EXT_ACK; + else + nlk->flags &= ~NETLINK_F_EXT_ACK; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -1703,6 +1743,15 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, return -EFAULT; err = 0; break; + case NETLINK_EXT_ACK: + if (len < sizeof(int)) + return -EINVAL; + len = sizeof(int); + val = nlk->flags & NETLINK_F_EXT_ACK ? 1 : 0; + if (put_user(len, optlen) || put_user(val, optval)) + return -EFAULT; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -2234,21 +2283,44 @@ error_free: } EXPORT_SYMBOL(__netlink_dump_start); -void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) +void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, + const struct netlink_ext_ack *extack) { struct sk_buff *skb; struct nlmsghdr *rep; struct nlmsgerr *errmsg; size_t payload = sizeof(*errmsg); + size_t tlvlen = 0; struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk); + unsigned int flags = 0; /* Error messages get the original request appened, unless the user - * requests to cap the error message. + * requests to cap the error message, and get extra error data if + * requested. */ - if (!(nlk->flags & NETLINK_F_CAP_ACK) && err) - payload += nlmsg_len(nlh); + if (err) { + if (!(nlk->flags & NETLINK_F_CAP_ACK)) + payload += nlmsg_len(nlh); + else + flags |= NLM_F_CAPPED; + if (nlk->flags & NETLINK_F_EXT_ACK && extack) { + if (extack->_msg) + tlvlen += nla_total_size(strlen(extack->_msg) + 1); + if (extack->bad_attr) + tlvlen += nla_total_size(sizeof(u32)); + } + } else { + flags |= NLM_F_CAPPED; - skb = nlmsg_new(payload, GFP_KERNEL); + if (nlk->flags & NETLINK_F_EXT_ACK && + extack && extack->cookie_len) + tlvlen += nla_total_size(extack->cookie_len); + } + + if (tlvlen) + flags |= NLM_F_ACK_TLVS; + + skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); if (!skb) { struct sock *sk; @@ -2264,17 +2336,42 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) } rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - NLMSG_ERROR, payload, 0); + NLMSG_ERROR, payload, flags); errmsg = nlmsg_data(rep); errmsg->error = err; memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh)); + + if (nlk->flags & NETLINK_F_EXT_ACK && extack) { + if (err) { + if (extack->_msg) + WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, + extack->_msg)); + if (extack->bad_attr && + !WARN_ON((u8 *)extack->bad_attr < in_skb->data || + (u8 *)extack->bad_attr >= in_skb->data + + in_skb->len)) + WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, + (u8 *)extack->bad_attr - + in_skb->data)); + } else { + if (extack->cookie_len) + WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, + extack->cookie_len, + extack->cookie)); + } + } + + nlmsg_end(skb, rep); + netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); } EXPORT_SYMBOL(netlink_ack); int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, - struct nlmsghdr *)) + struct nlmsghdr *, + struct netlink_ext_ack *)) { + struct netlink_ext_ack extack = {}; struct nlmsghdr *nlh; int err; @@ -2295,13 +2392,13 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, if (nlh->nlmsg_type < NLMSG_MIN_TYPE) goto ack; - err = cb(skb, nlh); + err = cb(skb, nlh, &extack); if (err == -EINTR) goto skip; ack: if (nlh->nlmsg_flags & NLM_F_ACK || err) - netlink_ack(skb, nlh, err); + netlink_ack(skb, nlh, err, &extack); skip: msglen = NLMSG_ALIGN(nlh->nlmsg_len); diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 4fdb38318977..3490f2430532 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -6,6 +6,15 @@ #include <linux/workqueue.h> #include <net/sock.h> +/* flags */ +#define NETLINK_F_KERNEL_SOCKET 0x1 +#define NETLINK_F_RECV_PKTINFO 0x2 +#define NETLINK_F_BROADCAST_SEND_ERROR 0x4 +#define NETLINK_F_RECV_NO_ENOBUFS 0x8 +#define NETLINK_F_LISTEN_ALL_NSID 0x10 +#define NETLINK_F_CAP_ACK 0x20 +#define NETLINK_F_EXT_ACK 0x40 + #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) diff --git a/net/netlink/diag.c b/net/netlink/diag.c index a5546249fb10..8faa20b4d457 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -19,6 +19,27 @@ static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) nlk->groups); } +static int sk_diag_put_flags(struct sock *sk, struct sk_buff *skb) +{ + struct netlink_sock *nlk = nlk_sk(sk); + u32 flags = 0; + + if (nlk->cb_running) + flags |= NDIAG_FLAG_CB_RUNNING; + if (nlk->flags & NETLINK_F_RECV_PKTINFO) + flags |= NDIAG_FLAG_PKTINFO; + if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) + flags |= NDIAG_FLAG_BROADCAST_ERROR; + if (nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) + flags |= NDIAG_FLAG_NO_ENOBUFS; + if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID) + flags |= NDIAG_FLAG_LISTEN_ALL_NSID; + if (nlk->flags & NETLINK_F_CAP_ACK) + flags |= NDIAG_FLAG_CAP_ACK; + + return nla_put_u32(skb, NETLINK_DIAG_FLAGS, flags); +} + static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct netlink_diag_req *req, u32 portid, u32 seq, u32 flags, int sk_ino) @@ -52,6 +73,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) goto out_nlmsg_trim; + if ((req->ndiag_show & NDIAG_SHOW_FLAGS) && + sk_diag_put_flags(sk, skb)) + goto out_nlmsg_trim; + nlmsg_end(skb, nlh); return 0; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index fb6e10fdb217..10f8b4cff40a 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -497,7 +497,8 @@ static int genl_lock_done(struct netlink_callback *cb) static int genl_family_rcv_msg(const struct genl_family *family, struct sk_buff *skb, - struct nlmsghdr *nlh) + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { const struct genl_ops *ops; struct net *net = sock_net(skb->sk); @@ -573,7 +574,7 @@ static int genl_family_rcv_msg(const struct genl_family *family, if (attrbuf) { err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, - ops->policy); + ops->policy, extack); if (err < 0) goto out; } @@ -584,6 +585,7 @@ static int genl_family_rcv_msg(const struct genl_family *family, info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; info.attrs = attrbuf; + info.extack = extack; genl_info_net_set(&info, net); memset(&info.user_ptr, 0, sizeof(info.user_ptr)); @@ -605,7 +607,8 @@ out: return err; } -static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { const struct genl_family *family; int err; @@ -617,7 +620,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (!family->parallel_ops) genl_lock(); - err = genl_family_rcv_msg(family, skb, nlh); + err = genl_family_rcv_msg(family, skb, nlh, extack); if (!family->parallel_ops) genl_unlock(); @@ -783,8 +786,10 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - skb, CTRL_CMD_NEWFAMILY) < 0) + skb, CTRL_CMD_NEWFAMILY) < 0) { + n--; break; + } } cb->args[0] = n; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 03f3d5c7beb8..529443acd3bc 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -119,7 +119,8 @@ static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb) u32 idx; rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize, - attrbuf, nfc_genl_family.maxattr, nfc_genl_policy); + attrbuf, nfc_genl_family.maxattr, nfc_genl_policy, + NULL); if (rc < 0) return ERR_PTR(rc); @@ -1161,7 +1162,7 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) { rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr, - nfc_sdp_genl_policy); + nfc_sdp_genl_policy, info->extack); if (rc != 0) { rc = -EINVAL; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index c82301ce3fff..e4610676299b 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2014 Nicira, Inc. + * Copyright (c) 2007-2017 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -44,13 +44,10 @@ #include "conntrack.h" #include "vport.h" -static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, - struct sw_flow_key *key, - const struct nlattr *attr, int len); - struct deferred_action { struct sk_buff *skb; const struct nlattr *actions; + int actions_len; /* Store pkt_key clone when creating deferred action. */ struct sw_flow_key pkt_key; @@ -82,14 +79,31 @@ struct action_fifo { struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE]; }; -struct recirc_keys { +struct action_flow_keys { struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; }; static struct action_fifo __percpu *action_fifos; -static struct recirc_keys __percpu *recirc_keys; +static struct action_flow_keys __percpu *flow_keys; static DEFINE_PER_CPU(int, exec_actions_level); +/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys' + * space. Return NULL if out of key spaces. + */ +static struct sw_flow_key *clone_key(const struct sw_flow_key *key_) +{ + struct action_flow_keys *keys = this_cpu_ptr(flow_keys); + int level = this_cpu_read(exec_actions_level); + struct sw_flow_key *key = NULL; + + if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { + key = &keys->key[level - 1]; + *key = *key_; + } + + return key; +} + static void action_fifo_init(struct action_fifo *fifo) { fifo->head = 0; @@ -119,8 +133,9 @@ static struct deferred_action *action_fifo_put(struct action_fifo *fifo) /* Return true if fifo is not full */ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, - const struct sw_flow_key *key, - const struct nlattr *attr) + const struct sw_flow_key *key, + const struct nlattr *actions, + const int actions_len) { struct action_fifo *fifo; struct deferred_action *da; @@ -129,7 +144,8 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, da = action_fifo_put(fifo); if (da) { da->skb = skb; - da->actions = attr; + da->actions = actions; + da->actions_len = actions_len; da->pkt_key = *key; } @@ -146,6 +162,12 @@ static bool is_flow_key_valid(const struct sw_flow_key *key) return !(key->mac_proto & SW_FLOW_KEY_INVALID); } +static int clone_execute(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, + u32 recirc_id, + const struct nlattr *actions, int len, + bool last, bool clone_flow_key); + static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, __be16 ethertype) { @@ -908,72 +930,35 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, return ovs_dp_upcall(dp, skb, key, &upcall, cutlen); } +/* When 'last' is true, sample() should always consume the 'skb'. + * Otherwise, sample() should keep 'skb' intact regardless what + * actions are executed within sample(). + */ static int sample(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *attr, - const struct nlattr *actions, int actions_len) + bool last) { - const struct nlattr *acts_list = NULL; - const struct nlattr *a; - int rem; - u32 cutlen = 0; - - for (a = nla_data(attr), rem = nla_len(attr); rem > 0; - a = nla_next(a, &rem)) { - u32 probability; - - switch (nla_type(a)) { - case OVS_SAMPLE_ATTR_PROBABILITY: - probability = nla_get_u32(a); - if (!probability || prandom_u32() > probability) - return 0; - break; - - case OVS_SAMPLE_ATTR_ACTIONS: - acts_list = a; - break; - } - } - - rem = nla_len(acts_list); - a = nla_data(acts_list); - - /* Actions list is empty, do nothing */ - if (unlikely(!rem)) + struct nlattr *actions; + struct nlattr *sample_arg; + int rem = nla_len(attr); + const struct sample_arg *arg; + bool clone_flow_key; + + /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */ + sample_arg = nla_data(attr); + arg = nla_data(sample_arg); + actions = nla_next(sample_arg, &rem); + + if ((arg->probability != U32_MAX) && + (!arg->probability || prandom_u32() > arg->probability)) { + if (last) + consume_skb(skb); return 0; - - /* The only known usage of sample action is having a single user-space - * action, or having a truncate action followed by a single user-space - * action. Treat this usage as a special case. - * The output_userspace() should clone the skb to be sent to the - * user space. This skb will be consumed by its caller. - */ - if (unlikely(nla_type(a) == OVS_ACTION_ATTR_TRUNC)) { - struct ovs_action_trunc *trunc = nla_data(a); - - if (skb->len > trunc->max_len) - cutlen = skb->len - trunc->max_len; - - a = nla_next(a, &rem); } - if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE && - nla_is_last(a, rem))) - return output_userspace(dp, skb, key, a, actions, - actions_len, cutlen); - - skb = skb_clone(skb, GFP_ATOMIC); - if (!skb) - /* Skip the sample action when out of memory. */ - return 0; - - if (!add_deferred_actions(skb, key, a)) { - if (net_ratelimit()) - pr_warn("%s: deferred actions limit reached, dropping sample action\n", - ovs_dp_name(dp)); - - kfree_skb(skb); - } - return 0; + clone_flow_key = !arg->exec; + return clone_execute(dp, skb, key, 0, actions, rem, last, + clone_flow_key); } static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key, @@ -1084,10 +1069,9 @@ static int execute_masked_set_action(struct sk_buff *skb, static int execute_recirc(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, - const struct nlattr *a, int rem) + const struct nlattr *a, bool last) { - struct deferred_action *da; - int level; + u32 recirc_id; if (!is_flow_key_valid(key)) { int err; @@ -1098,43 +1082,8 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb, } BUG_ON(!is_flow_key_valid(key)); - if (!nla_is_last(a, rem)) { - /* Recirc action is the not the last action - * of the action list, need to clone the skb. - */ - skb = skb_clone(skb, GFP_ATOMIC); - - /* Skip the recirc action when out of memory, but - * continue on with the rest of the action list. - */ - if (!skb) - return 0; - } - - level = this_cpu_read(exec_actions_level); - if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { - struct recirc_keys *rks = this_cpu_ptr(recirc_keys); - struct sw_flow_key *recirc_key = &rks->key[level - 1]; - - *recirc_key = *key; - recirc_key->recirc_id = nla_get_u32(a); - ovs_dp_process_packet(skb, recirc_key); - - return 0; - } - - da = add_deferred_actions(skb, key, NULL); - if (da) { - da->pkt_key.recirc_id = nla_get_u32(a); - } else { - kfree_skb(skb); - - if (net_ratelimit()) - pr_warn("%s: deferred action limit reached, drop recirc action\n", - ovs_dp_name(dp)); - } - - return 0; + recirc_id = nla_get_u32(a); + return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true); } /* Execute a list of actions against 'skb'. */ @@ -1206,9 +1155,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = pop_vlan(skb, key); break; - case OVS_ACTION_ATTR_RECIRC: - err = execute_recirc(dp, skb, key, a, rem); - if (nla_is_last(a, rem)) { + case OVS_ACTION_ATTR_RECIRC: { + bool last = nla_is_last(a, rem); + + err = execute_recirc(dp, skb, key, a, last); + if (last) { /* If this is the last action, the skb has * been consumed or freed. * Return immediately. @@ -1216,6 +1167,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, return err; } break; + } case OVS_ACTION_ATTR_SET: err = execute_set_action(skb, key, nla_data(a)); @@ -1226,9 +1178,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = execute_masked_set_action(skb, key, nla_data(a)); break; - case OVS_ACTION_ATTR_SAMPLE: - err = sample(dp, skb, key, a, attr, len); + case OVS_ACTION_ATTR_SAMPLE: { + bool last = nla_is_last(a, rem); + + err = sample(dp, skb, key, a, last); + if (last) + return err; + break; + } case OVS_ACTION_ATTR_CT: if (!is_flow_key_valid(key)) { @@ -1264,6 +1222,79 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, return 0; } +/* Execute the actions on the clone of the packet. The effect of the + * execution does not affect the original 'skb' nor the original 'key'. + * + * The execution may be deferred in case the actions can not be executed + * immediately. + */ +static int clone_execute(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, u32 recirc_id, + const struct nlattr *actions, int len, + bool last, bool clone_flow_key) +{ + struct deferred_action *da; + struct sw_flow_key *clone; + + skb = last ? skb : skb_clone(skb, GFP_ATOMIC); + if (!skb) { + /* Out of memory, skip this action. + */ + return 0; + } + + /* When clone_flow_key is false, the 'key' will not be change + * by the actions, then the 'key' can be used directly. + * Otherwise, try to clone key from the next recursion level of + * 'flow_keys'. If clone is successful, execute the actions + * without deferring. + */ + clone = clone_flow_key ? clone_key(key) : key; + if (clone) { + int err = 0; + + if (actions) { /* Sample action */ + if (clone_flow_key) + __this_cpu_inc(exec_actions_level); + + err = do_execute_actions(dp, skb, clone, + actions, len); + + if (clone_flow_key) + __this_cpu_dec(exec_actions_level); + } else { /* Recirc action */ + clone->recirc_id = recirc_id; + ovs_dp_process_packet(skb, clone); + } + return err; + } + + /* Out of 'flow_keys' space. Defer actions */ + da = add_deferred_actions(skb, key, actions, len); + if (da) { + if (!actions) { /* Recirc action */ + key = &da->pkt_key; + key->recirc_id = recirc_id; + } + } else { + /* Out of per CPU action FIFO space. Drop the 'skb' and + * log an error. + */ + kfree_skb(skb); + + if (net_ratelimit()) { + if (actions) { /* Sample action */ + pr_warn("%s: deferred action limit reached, drop sample action\n", + ovs_dp_name(dp)); + } else { /* Recirc action */ + pr_warn("%s: deferred action limit reached, drop recirc action\n", + ovs_dp_name(dp)); + } + } + } + return 0; +} + static void process_deferred_actions(struct datapath *dp) { struct action_fifo *fifo = this_cpu_ptr(action_fifos); @@ -1278,10 +1309,10 @@ static void process_deferred_actions(struct datapath *dp) struct sk_buff *skb = da->skb; struct sw_flow_key *key = &da->pkt_key; const struct nlattr *actions = da->actions; + int actions_len = da->actions_len; if (actions) - do_execute_actions(dp, skb, key, actions, - nla_len(actions)); + do_execute_actions(dp, skb, key, actions, actions_len); else ovs_dp_process_packet(skb, key); } while (!action_fifo_is_empty(fifo)); @@ -1323,8 +1354,8 @@ int action_fifos_init(void) if (!action_fifos) return -ENOMEM; - recirc_keys = alloc_percpu(struct recirc_keys); - if (!recirc_keys) { + flow_keys = alloc_percpu(struct action_flow_keys); + if (!flow_keys) { free_percpu(action_fifos); return -ENOMEM; } @@ -1335,5 +1366,5 @@ int action_fifos_init(void) void action_fifos_exit(void) { free_percpu(action_fifos); - free_percpu(recirc_keys); + free_percpu(flow_keys); } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e0a87776a010..7b2c2fce408a 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -643,8 +643,8 @@ static bool skb_nfct_cached(struct net *net, */ if (nf_ct_is_confirmed(ct)) nf_ct_delete(ct, 0, 0); - else - nf_conntrack_put(&ct->ct_general); + + nf_conntrack_put(&ct->ct_general); nf_ct_set(skb, NULL, 0); return false; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 9c62b6325f7a..7b17da9a94a0 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1353,7 +1353,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) int err; err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a, - OVS_FLOW_ATTR_MAX, flow_policy); + OVS_FLOW_ATTR_MAX, flow_policy, NULL); if (err) return err; ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 1c6e9377436d..da931bdef8a7 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -34,8 +34,6 @@ #define DP_MAX_PORTS USHRT_MAX #define DP_VPORT_HASH_BUCKETS 1024 -#define SAMPLE_ACTION_DEPTH 3 - /** * struct dp_stats_percpu - per-cpu packet processing statistics for a given * datapath. diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 9d4bb8eb63f2..3f76cb765e5b 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -527,7 +527,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) /* Link layer. */ clear_vlan(key); - if (key->mac_proto == MAC_PROTO_NONE) { + if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { if (unlikely(eth_type_vlan(skb->protocol))) return -EINVAL; @@ -745,7 +745,13 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) { - return key_extract(skb, key); + int res; + + res = key_extract(skb, key); + if (!res) + key->mac_proto &= ~SW_FLOW_KEY_INVALID; + + return res; } static int key_extract_mac_proto(struct sk_buff *skb) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 6f5fa50f716d..7e1d8a2afa63 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2014 Nicira, Inc. + * Copyright (c) 2007-2017 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -59,6 +59,39 @@ struct ovs_len_tbl { #define OVS_ATTR_NESTED -1 #define OVS_ATTR_VARIABLE -2 +static bool actions_may_change_flow(const struct nlattr *actions) +{ + struct nlattr *nla; + int rem; + + nla_for_each_nested(nla, actions, rem) { + u16 action = nla_type(nla); + + switch (action) { + case OVS_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_RECIRC: + case OVS_ACTION_ATTR_TRUNC: + case OVS_ACTION_ATTR_USERSPACE: + break; + + case OVS_ACTION_ATTR_CT: + case OVS_ACTION_ATTR_HASH: + case OVS_ACTION_ATTR_POP_ETH: + case OVS_ACTION_ATTR_POP_MPLS: + case OVS_ACTION_ATTR_POP_VLAN: + case OVS_ACTION_ATTR_PUSH_ETH: + case OVS_ACTION_ATTR_PUSH_MPLS: + case OVS_ACTION_ATTR_PUSH_VLAN: + case OVS_ACTION_ATTR_SAMPLE: + case OVS_ACTION_ATTR_SET: + case OVS_ACTION_ATTR_SET_MASKED: + default: + return true; + } + } + return false; +} + static void update_range(struct sw_flow_match *match, size_t offset, size_t size, bool is_mask) { @@ -604,7 +637,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, ipv4 = true; break; case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: - SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst, + SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src, nla_get_in6_addr(a), is_mask); ipv6 = true; break; @@ -665,6 +698,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, tun_flags |= TUNNEL_VXLAN_OPT; opts_type = type; break; + case OVS_TUNNEL_KEY_ATTR_PAD: + break; default: OVS_NLERR(log, "Unknown IP tunnel attribute %d", type); @@ -2021,18 +2056,20 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, - int depth, struct sw_flow_actions **sfa, + struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log); static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, - const struct sw_flow_key *key, int depth, + const struct sw_flow_key *key, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci, bool log) + __be16 eth_type, __be16 vlan_tci, + bool log, bool last) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; const struct nlattr *a; - int rem, start, err, st_acts; + int rem, start, err; + struct sample_arg arg; memset(attrs, 0, sizeof(attrs)); nla_for_each_nested(a, attr, rem) { @@ -2056,20 +2093,32 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); if (start < 0) return start; - err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, - nla_data(probability), sizeof(u32), log); + + /* When both skb and flow may be changed, put the sample + * into a deferred fifo. On the other hand, if only skb + * may be modified, the actions can be executed in place. + * + * Do this analysis at the flow installation time. + * Set 'clone_action->exec' to true if the actions can be + * executed without being deferred. + * + * If the sample is the last action, it can always be excuted + * rather than deferred. + */ + arg.exec = last || !actions_may_change_flow(actions); + arg.probability = nla_get_u32(probability); + + err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg), + log); if (err) return err; - st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); - if (st_acts < 0) - return st_acts; - err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa, + err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, vlan_tci, log); + if (err) return err; - add_nested_action_end(*sfa, st_acts); add_nested_action_end(*sfa, start); return 0; @@ -2378,8 +2427,8 @@ static int validate_userspace(const struct nlattr *attr) struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; - error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, - attr, userspace_policy); + error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr, + userspace_policy, NULL); if (error) return error; @@ -2406,16 +2455,13 @@ static int copy_action(const struct nlattr *from, static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, - int depth, struct sw_flow_actions **sfa, + struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log) { u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; - if (depth >= SAMPLE_ACTION_DEPTH) - return -EOVERFLOW; - nla_for_each_nested(a, attr, rem) { /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { @@ -2553,13 +2599,17 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return err; break; - case OVS_ACTION_ATTR_SAMPLE: - err = validate_and_copy_sample(net, a, key, depth, sfa, - eth_type, vlan_tci, log); + case OVS_ACTION_ATTR_SAMPLE: { + bool last = nla_is_last(a, rem); + + err = validate_and_copy_sample(net, a, key, sfa, + eth_type, vlan_tci, + log, last); if (err) return err; skip_copy = true; break; + } case OVS_ACTION_ATTR_CT: err = ovs_ct_copy_action(net, a, key, sfa, log); @@ -2613,7 +2663,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return PTR_ERR(*sfa); (*sfa)->orig_len = nla_len(attr); - err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type, + err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, key->eth.vlan.tci, log); if (err) ovs_nla_free_flow_actions(*sfa); @@ -2621,39 +2671,44 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return err; } -static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) +static int sample_action_to_attr(const struct nlattr *attr, + struct sk_buff *skb) { - const struct nlattr *a; - struct nlattr *start; - int err = 0, rem; + struct nlattr *start, *ac_start = NULL, *sample_arg; + int err = 0, rem = nla_len(attr); + const struct sample_arg *arg; + struct nlattr *actions; start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); if (!start) return -EMSGSIZE; - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - struct nlattr *st_sample; + sample_arg = nla_data(attr); + arg = nla_data(sample_arg); + actions = nla_next(sample_arg, &rem); - switch (type) { - case OVS_SAMPLE_ATTR_PROBABILITY: - if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, - sizeof(u32), nla_data(a))) - return -EMSGSIZE; - break; - case OVS_SAMPLE_ATTR_ACTIONS: - st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); - if (!st_sample) - return -EMSGSIZE; - err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); - if (err) - return err; - nla_nest_end(skb, st_sample); - break; - } + if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) { + err = -EMSGSIZE; + goto out; + } + + ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); + if (!ac_start) { + err = -EMSGSIZE; + goto out; + } + + err = ovs_nla_put_actions(actions, rem, skb); + +out: + if (err) { + nla_nest_cancel(skb, ac_start); + nla_nest_cancel(skb, start); + } else { + nla_nest_end(skb, ac_start); + nla_nest_end(skb, start); } - nla_nest_end(skb, start); return err; } diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 7eb955e453e6..869acb3b3d3f 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -70,7 +70,8 @@ static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr, if (nla_len(attr) < sizeof(struct nlattr)) return -EINVAL; - err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy); + err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy, + NULL); if (err < 0) return err; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a0dbe7ca8f72..8489beff5c25 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3665,6 +3665,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; + if (val > INT_MAX) + return -EINVAL; po->tp_reserve = val; return 0; } @@ -4193,8 +4195,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; if (po->tp_version >= TPACKET_V3 && - (int)(req->tp_block_size - - BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0) + req->tp_block_size <= + BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv)) goto out; if (unlikely(req->tp_frame_size < po->tp_hdrlen + po->tp_reserve)) @@ -4205,6 +4207,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; + if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) + goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) goto out; diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index bc5ee5fbe6ae..363799bf97f6 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -78,7 +78,8 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy, + NULL); if (err < 0) return err; @@ -243,7 +244,8 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh) ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy); + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy, + NULL); if (err < 0) return err; diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig index b83c6807a5ae..326fd97444f5 100644 --- a/net/qrtr/Kconfig +++ b/net/qrtr/Kconfig @@ -16,7 +16,7 @@ if QRTR config QRTR_SMD tristate "SMD IPC Router channels" - depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) + depends on RPMSG || (COMPILE_TEST && RPMSG=n) ---help--- Say Y here to support SMD based ipcrouter channels. SMD is the most common transport for IPC Router. diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index ae5ac175b2be..7fdbb34002f5 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -957,7 +957,7 @@ static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) ASSERT_RTNL(); - rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy); + rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy, NULL); if (rc < 0) return rc; diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c index 0d11132b3370..50615d5efac1 100644 --- a/net/qrtr/smd.c +++ b/net/qrtr/smd.c @@ -14,21 +14,21 @@ #include <linux/module.h> #include <linux/skbuff.h> -#include <linux/soc/qcom/smd.h> +#include <linux/rpmsg.h> #include "qrtr.h" struct qrtr_smd_dev { struct qrtr_endpoint ep; - struct qcom_smd_channel *channel; + struct rpmsg_endpoint *channel; struct device *dev; }; /* from smd to qrtr */ -static int qcom_smd_qrtr_callback(struct qcom_smd_channel *channel, - const void *data, size_t len) +static int qcom_smd_qrtr_callback(struct rpmsg_device *rpdev, + void *data, int len, void *priv, u32 addr) { - struct qrtr_smd_dev *qdev = qcom_smd_get_drvdata(channel); + struct qrtr_smd_dev *qdev = dev_get_drvdata(&rpdev->dev); int rc; if (!qdev) @@ -54,7 +54,7 @@ static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) if (rc) goto out; - rc = qcom_smd_send(qdev->channel, skb->data, skb->len); + rc = rpmsg_send(qdev->channel, skb->data, skb->len); out: if (rc) @@ -64,57 +64,55 @@ out: return rc; } -static int qcom_smd_qrtr_probe(struct qcom_smd_device *sdev) +static int qcom_smd_qrtr_probe(struct rpmsg_device *rpdev) { struct qrtr_smd_dev *qdev; int rc; - qdev = devm_kzalloc(&sdev->dev, sizeof(*qdev), GFP_KERNEL); + qdev = devm_kzalloc(&rpdev->dev, sizeof(*qdev), GFP_KERNEL); if (!qdev) return -ENOMEM; - qdev->channel = sdev->channel; - qdev->dev = &sdev->dev; + qdev->channel = rpdev->ept; + qdev->dev = &rpdev->dev; qdev->ep.xmit = qcom_smd_qrtr_send; rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO); if (rc) return rc; - qcom_smd_set_drvdata(sdev->channel, qdev); - dev_set_drvdata(&sdev->dev, qdev); + dev_set_drvdata(&rpdev->dev, qdev); - dev_dbg(&sdev->dev, "Qualcomm SMD QRTR driver probed\n"); + dev_dbg(&rpdev->dev, "Qualcomm SMD QRTR driver probed\n"); return 0; } -static void qcom_smd_qrtr_remove(struct qcom_smd_device *sdev) +static void qcom_smd_qrtr_remove(struct rpmsg_device *rpdev) { - struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev); + struct qrtr_smd_dev *qdev = dev_get_drvdata(&rpdev->dev); qrtr_endpoint_unregister(&qdev->ep); - dev_set_drvdata(&sdev->dev, NULL); + dev_set_drvdata(&rpdev->dev, NULL); } -static const struct qcom_smd_id qcom_smd_qrtr_smd_match[] = { +static const struct rpmsg_device_id qcom_smd_qrtr_smd_match[] = { { "IPCRTR" }, {} }; -static struct qcom_smd_driver qcom_smd_qrtr_driver = { +static struct rpmsg_driver qcom_smd_qrtr_driver = { .probe = qcom_smd_qrtr_probe, .remove = qcom_smd_qrtr_remove, .callback = qcom_smd_qrtr_callback, - .smd_match_table = qcom_smd_qrtr_smd_match, - .driver = { + .id_table = qcom_smd_qrtr_smd_match, + .drv = { .name = "qcom_smd_qrtr", - .owner = THIS_MODULE, }, }; -module_qcom_smd_driver(qcom_smd_qrtr_driver); +module_rpmsg_driver(qcom_smd_qrtr_driver); MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver"); MODULE_LICENSE("GPL v2"); diff --git a/net/rds/connection.c b/net/rds/connection.c index 1fa75ab7b733..6a5ebdea7d2e 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -333,11 +333,19 @@ void rds_conn_shutdown(struct rds_conn_path *cp) rds_conn_path_reset(cp); if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING, + RDS_CONN_DOWN) && + !rds_conn_path_transition(cp, RDS_CONN_ERROR, RDS_CONN_DOWN)) { /* This can happen - eg when we're in the middle of tearing * down the connection, and someone unloads the rds module. - * Quite reproduceable with loopback connections. + * Quite reproducible with loopback connections. * Mostly harmless. + * + * Note that this also happens with rds-tcp because + * we could have triggered rds_conn_path_drop in irq + * mode from rds_tcp_state change on the receipt of + * a FIN, thus we need to recheck for RDS_CONN_ERROR + * here. */ rds_conn_path_error(cp, "%s: failed to transition " "to state DOWN, current state " diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 1c38d2c7caa8..80fb6f63e768 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -702,9 +702,8 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, event->param.conn.initiator_depth); /* rdma_accept() calls rdma_reject() internally if it fails */ - err = rdma_accept(cm_id, &conn_param); - if (err) - rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err); + if (rdma_accept(cm_id, &conn_param)) + rds_ib_conn_error(conn, "rdma_accept failed\n"); out: if (conn) diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c index 4fe8f4fec4ee..86ef907067bb 100644 --- a/net/rds/ib_fmr.c +++ b/net/rds/ib_fmr.c @@ -78,17 +78,15 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages) return ibmr; out_no_cigar: - if (ibmr) { - if (fmr->fmr) - ib_dealloc_fmr(fmr->fmr); - kfree(ibmr); - } + kfree(ibmr); atomic_dec(&pool->item_count); + return ERR_PTR(err); } -int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr, - struct scatterlist *sg, unsigned int nents) +static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, + struct rds_ib_mr *ibmr, struct scatterlist *sg, + unsigned int nents) { struct ib_device *dev = rds_ibdev->dev; struct rds_ib_fmr *fmr = &ibmr->u.fmr; @@ -114,29 +112,39 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr, u64 dma_addr = ib_sg_dma_address(dev, &scat[i]); if (dma_addr & ~PAGE_MASK) { - if (i > 0) + if (i > 0) { + ib_dma_unmap_sg(dev, sg, nents, + DMA_BIDIRECTIONAL); return -EINVAL; - else + } else { ++page_cnt; + } } if ((dma_addr + dma_len) & ~PAGE_MASK) { - if (i < sg_dma_len - 1) + if (i < sg_dma_len - 1) { + ib_dma_unmap_sg(dev, sg, nents, + DMA_BIDIRECTIONAL); return -EINVAL; - else + } else { ++page_cnt; + } } len += dma_len; } page_cnt += len >> PAGE_SHIFT; - if (page_cnt > ibmr->pool->fmr_attr.max_pages) + if (page_cnt > ibmr->pool->fmr_attr.max_pages) { + ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); return -EINVAL; + } dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC, rdsibdev_to_node(rds_ibdev)); - if (!dma_pages) + if (!dma_pages) { + ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); return -ENOMEM; + } page_cnt = 0; for (i = 0; i < sg_dma_len; ++i) { @@ -149,8 +157,10 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr, } ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr); - if (ret) + if (ret) { + ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); goto out; + } /* Success - we successfully remapped the MR, so we can * safely tear down the old mapping. diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h index 5d6e98a79a5e..0ea4ab017a8c 100644 --- a/net/rds/ib_mr.h +++ b/net/rds/ib_mr.h @@ -125,8 +125,6 @@ void rds_ib_mr_exit(void); void __rds_ib_teardown_mr(struct rds_ib_mr *); void rds_ib_teardown_mr(struct rds_ib_mr *); struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *, int); -int rds_ib_map_fmr(struct rds_ib_device *, struct rds_ib_mr *, - struct scatterlist *, unsigned int); struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *); int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **); struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *, struct scatterlist *, diff --git a/net/rds/threads.c b/net/rds/threads.c index e36e333a0aa0..3e447d056d09 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -156,7 +156,7 @@ void rds_connect_worker(struct work_struct *work) struct rds_connection *conn = cp->cp_conn; int ret; - if (cp->cp_index > 1 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr) + if (cp->cp_index > 0 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr) return; clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 26a7b1db1361..7486926e60a8 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -740,6 +740,25 @@ static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, } /* + * Abort a call due to a protocol error. + */ +static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, + struct sk_buff *skb, + const char *eproto_why, + const char *why, + u32 abort_code) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why); + return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO); +} + +#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \ + __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \ + (abort_why), (abort_code)) + +/* * conn_client.c */ extern unsigned int rxrpc_max_client_connections; diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 0ed181f53f32..1752fcf8e8f1 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -413,11 +413,11 @@ found_service: case RXRPC_CONN_REMOTELY_ABORTED: rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - conn->remote_abort, ECONNABORTED); + conn->remote_abort, -ECONNABORTED); break; case RXRPC_CONN_LOCALLY_ABORTED: rxrpc_abort_call("CON", call, sp->hdr.seq, - conn->local_abort, ECONNABORTED); + conn->local_abort, -ECONNABORTED); break; default: BUG(); @@ -600,7 +600,7 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) write_lock_bh(&call->state_lock); switch (call->state) { case RXRPC_CALL_SERVER_ACCEPTING: - __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, ECONNABORTED); + __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, -ECONNABORTED); abort = true; /* fall through */ case RXRPC_CALL_COMPLETE: diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 97a17ada4431..7a77844aab16 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -386,7 +386,7 @@ recheck_state: now = ktime_get_real(); if (ktime_before(call->expire_at, now)) { - rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); + rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index d79cd36987a9..47f7f4205653 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -486,7 +486,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) call = list_entry(rx->to_be_accepted.next, struct rxrpc_call, accept_link); list_del(&call->accept_link); - rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET); rxrpc_put_call(call, rxrpc_call_put); } @@ -494,7 +494,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) call = list_entry(rx->sock_calls.next, struct rxrpc_call, sock_link); rxrpc_get_call(call, rxrpc_call_got); - rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET); rxrpc_send_abort_packet(call); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index c3be03e8d098..e8dea0d49e7f 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -550,6 +550,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, call->cid = conn->proto.cid | channel; call->call_id = call_id; + trace_rxrpc_connect_call(call); _net("CONNECT call %08x:%08x as call %d on conn %d", call->cid, call->call_id, call->debug_id, conn->debug_id); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 3f9d8d7ec632..46babcf82ce8 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -168,7 +168,7 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, * generate a connection-level abort */ static int rxrpc_abort_connection(struct rxrpc_connection *conn, - u32 error, u32 abort_code) + int error, u32 abort_code) { struct rxrpc_wire_header whdr; struct msghdr msg; @@ -275,16 +275,23 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, rxrpc_conn_retransmit_call(conn, skb); return 0; + case RXRPC_PACKET_TYPE_BUSY: + /* Just ignore BUSY packets for now. */ + return 0; + case RXRPC_PACKET_TYPE_ABORT: if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - &wtmp, sizeof(wtmp)) < 0) + &wtmp, sizeof(wtmp)) < 0) { + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_abort")); return -EPROTO; + } abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); conn->state = RXRPC_CONN_REMOTELY_ABORTED; rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, ECONNABORTED); + abort_code, -ECONNABORTED); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: @@ -323,7 +330,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return 0; default: - _leave(" = -EPROTO [%u]", sp->hdr.type); + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_conn_pkt")); return -EPROTO; } } @@ -366,7 +374,7 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn) abort: _debug("abort %d, %d", ret, abort_code); - rxrpc_abort_connection(conn, -ret, abort_code); + rxrpc_abort_connection(conn, ret, abort_code); _leave(" [aborted]"); } @@ -415,9 +423,8 @@ requeue_and_leave: goto out; protocol_error: - if (rxrpc_abort_connection(conn, -ret, abort_code) < 0) + if (rxrpc_abort_connection(conn, ret, abort_code) < 0) goto requeue_and_leave; rxrpc_free_skb(skb, rxrpc_skb_rx_freed); - _leave(" [EPROTO]"); goto out; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 18b2ad8be8e2..45dba732a3b4 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -30,7 +30,7 @@ static void rxrpc_proto_abort(const char *why, struct rxrpc_call *call, rxrpc_seq_t seq) { - if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) { + if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) { set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } @@ -665,6 +665,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, rwind = RXRPC_RXTX_BUFF_SIZE - 1; if (rwind > call->tx_winsize) wake = true; + trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, + ntohl(ackinfo->rwind), wake); call->tx_winsize = rwind; } @@ -877,7 +879,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) } /* - * Process an ABORT packet. + * Process an ABORT packet directed at a call. */ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) { @@ -892,10 +894,12 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) &wtmp, sizeof(wtmp)) >= 0) abort_code = ntohl(wtmp); + trace_rxrpc_rx_abort(call, sp->hdr.serial, abort_code); + _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, ECONNABORTED)) + abort_code, -ECONNABORTED)) rxrpc_notify_socket(call); } @@ -958,7 +962,7 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, case RXRPC_CALL_COMPLETE: break; default: - if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, ESHUTDOWN)) { + if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) { set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } @@ -1017,8 +1021,11 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) struct rxrpc_wire_header whdr; /* dig out the RxRPC connection details */ - if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) + if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) { + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_hdr")); return -EBADMSG; + } memset(sp, 0, sizeof(*sp)); sp->hdr.epoch = ntohl(whdr.epoch); diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 7d4375e557e6..af276f173b10 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -46,7 +46,10 @@ static int none_respond_to_challenge(struct rxrpc_connection *conn, struct sk_buff *skb, u32 *_abort_code) { - *_abort_code = RX_PROTOCOL_ERROR; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("chall_none")); return -EPROTO; } @@ -54,7 +57,10 @@ static int none_verify_response(struct rxrpc_connection *conn, struct sk_buff *skb, u32 *_abort_code) { - *_abort_code = RX_PROTOCOL_ERROR; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("resp_none")); return -EPROTO; } diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index bf13b8470c9a..1ed9c0c2e94f 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -296,7 +296,7 @@ void rxrpc_peer_error_distributor(struct work_struct *work) hlist_del_init(&call->error_link); rxrpc_see_call(call); - if (rxrpc_set_call_completion(call, compl, 0, error)) + if (rxrpc_set_call_completion(call, compl, 0, -error)) rxrpc_notify_socket(call); } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 3e2f1a8e9c5b..f9caf3b77509 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -83,11 +83,11 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); break; case RXRPC_CALL_NETWORK_ERROR: - tmp = call->error; + tmp = -call->error; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp); break; case RXRPC_CALL_LOCAL_ERROR: - tmp = call->error; + tmp = -call->error; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp); break; default: @@ -682,14 +682,16 @@ out: return ret; short_data: + trace_rxrpc_rx_eproto(call, 0, tracepoint_string("short_data")); ret = -EBADMSG; goto out; excess_data: + trace_rxrpc_rx_eproto(call, 0, tracepoint_string("excess_data")); ret = -EMSGSIZE; goto out; call_complete: *_abort = call->abort_code; - ret = -call->error; + ret = call->error; if (call->completion == RXRPC_CALL_SUCCEEDED) { ret = 1; if (size > 0) diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 4374e7b9c7bf..1bb9b2ccc267 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -148,15 +148,13 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, u32 data_size, void *sechdr) { - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxkad_level1_hdr hdr; struct rxrpc_crypt iv; struct scatterlist sg; u16 check; - sp = rxrpc_skb(skb); - _enter(""); check = sp->hdr.seq ^ call->call_id; @@ -323,6 +321,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_crypt iv; struct scatterlist sg[16]; struct sk_buff *trailer; + bool aborted; u32 data_size, buf; u16 check; int nsg; @@ -330,7 +329,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, _enter(""); if (len < 8) { - rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H", + RXKADSEALEDINCON); goto protocol_error; } @@ -355,7 +355,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, /* Extract the decrypted packet length */ if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { - rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1", + RXKADDATALEN); goto protocol_error; } offset += sizeof(sechdr); @@ -368,12 +369,14 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - rxrpc_abort_call("V1C", call, seq, RXKADSEALEDINCON, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_check", "V1C", + RXKADSEALEDINCON); goto protocol_error; } if (data_size > len) { - rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L", + RXKADDATALEN); goto protocol_error; } @@ -381,8 +384,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, return 0; protocol_error: - rxrpc_send_abort_packet(call); - _leave(" = -EPROTO"); + if (aborted) + rxrpc_send_abort_packet(call); return -EPROTO; nomem: @@ -403,6 +406,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; struct sk_buff *trailer; + bool aborted; u32 data_size, buf; u16 check; int nsg; @@ -410,7 +414,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, _enter(",{%d}", skb->len); if (len < 8) { - rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H", + RXKADSEALEDINCON); goto protocol_error; } @@ -445,7 +450,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, /* Extract the decrypted packet length */ if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { - rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2", + RXKADDATALEN); goto protocol_error; } offset += sizeof(sechdr); @@ -458,12 +464,14 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - rxrpc_abort_call("V2C", call, seq, RXKADSEALEDINCON, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_check", "V2C", + RXKADSEALEDINCON); goto protocol_error; } if (data_size > len) { - rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L", + RXKADDATALEN); goto protocol_error; } @@ -471,8 +479,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, return 0; protocol_error: - rxrpc_send_abort_packet(call); - _leave(" = -EPROTO"); + if (aborted) + rxrpc_send_abort_packet(call); return -EPROTO; nomem: @@ -491,6 +499,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist sg; + bool aborted; u16 cksum; u32 x, y; @@ -522,10 +531,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, cksum = 1; /* zero checksums are not permitted */ if (cksum != expected_cksum) { - rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO); - rxrpc_send_abort_packet(call); - _leave(" = -EPROTO [csum failed]"); - return -EPROTO; + aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK", + RXKADSEALEDINCON); + goto protocol_error; } switch (call->conn->params.security_level) { @@ -538,6 +546,11 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, default: return -ENOANO; } + +protocol_error: + if (aborted) + rxrpc_send_abort_packet(call); + return -EPROTO; } /* @@ -754,22 +767,23 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, struct rxkad_response resp __attribute__((aligned(8))); /* must be aligned for crypto */ struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + const char *eproto; u32 version, nonce, min_level, abort_code; int ret; _enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key)); - if (!conn->params.key) { - _leave(" = -EPROTO [no key]"); - return -EPROTO; - } + eproto = tracepoint_string("chall_no_key"); + abort_code = RX_PROTOCOL_ERROR; + if (!conn->params.key) + goto protocol_error; + abort_code = RXKADEXPIRED; ret = key_validate(conn->params.key); - if (ret < 0) { - *_abort_code = RXKADEXPIRED; - return ret; - } + if (ret < 0) + goto other_error; + eproto = tracepoint_string("chall_short"); abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &challenge, sizeof(challenge)) < 0) @@ -782,13 +796,15 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }", sp->hdr.serial, version, nonce, min_level); + eproto = tracepoint_string("chall_ver"); abort_code = RXKADINCONSISTENCY; if (version != RXKAD_VERSION) goto protocol_error; abort_code = RXKADLEVELFAIL; + ret = -EACCES; if (conn->params.security_level < min_level) - goto protocol_error; + goto other_error; token = conn->params.key->payload.data[0]; @@ -815,28 +831,34 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, return rxkad_send_response(conn, &sp->hdr, &resp, token->kad); protocol_error: + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); + ret = -EPROTO; +other_error: *_abort_code = abort_code; - _leave(" = -EPROTO [%d]", abort_code); - return -EPROTO; + return ret; } /* * decrypt the kerberos IV ticket in the response */ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, + struct sk_buff *skb, void *ticket, size_t ticket_len, struct rxrpc_crypt *_session_key, time_t *_expiry, u32 *_abort_code) { struct skcipher_request *req; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv, key; struct scatterlist sg[1]; struct in_addr addr; unsigned int life; + const char *eproto; time_t issue, now; bool little_endian; int ret; + u32 abort_code; u8 *p, *q, *name, *end; _enter("{%d},{%x}", conn->debug_id, key_serial(conn->server_key)); @@ -847,11 +869,11 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, if (ret < 0) { switch (ret) { case -EKEYEXPIRED: - *_abort_code = RXKADEXPIRED; - goto error; + abort_code = RXKADEXPIRED; + goto other_error; default: - *_abort_code = RXKADNOAUTH; - goto error; + abort_code = RXKADNOAUTH; + goto other_error; } } @@ -860,13 +882,11 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv)); + ret = -ENOMEM; req = skcipher_request_alloc(conn->server_key->payload.data[0], GFP_NOFS); - if (!req) { - *_abort_code = RXKADNOAUTH; - ret = -ENOMEM; - goto error; - } + if (!req) + goto temporary_error; sg_init_one(&sg[0], ticket, ticket_len); skcipher_request_set_callback(req, 0, NULL, NULL); @@ -877,11 +897,12 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, p = ticket; end = p + ticket_len; -#define Z(size) \ +#define Z(field) \ ({ \ u8 *__str = p; \ + eproto = tracepoint_string("rxkad_bad_"#field); \ q = memchr(p, 0, end - p); \ - if (!q || q - p > (size)) \ + if (!q || q - p > (field##_SZ)) \ goto bad_ticket; \ for (; p < q; p++) \ if (!isprint(*p)) \ @@ -896,17 +917,18 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, p++; /* extract the authentication name */ - name = Z(ANAME_SZ); + name = Z(ANAME); _debug("KIV ANAME: %s", name); /* extract the principal's instance */ - name = Z(INST_SZ); + name = Z(INST); _debug("KIV INST : %s", name); /* extract the principal's authentication domain */ - name = Z(REALM_SZ); + name = Z(REALM); _debug("KIV REALM: %s", name); + eproto = tracepoint_string("rxkad_bad_len"); if (end - p < 4 + 8 + 4 + 2) goto bad_ticket; @@ -941,36 +963,37 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, /* check the ticket is in date */ if (issue > now) { - *_abort_code = RXKADNOAUTH; + abort_code = RXKADNOAUTH; ret = -EKEYREJECTED; - goto error; + goto other_error; } if (issue < now - life) { - *_abort_code = RXKADEXPIRED; + abort_code = RXKADEXPIRED; ret = -EKEYEXPIRED; - goto error; + goto other_error; } *_expiry = issue + life; /* get the service name */ - name = Z(SNAME_SZ); + name = Z(SNAME); _debug("KIV SNAME: %s", name); /* get the service instance name */ - name = Z(INST_SZ); + name = Z(INST); _debug("KIV SINST: %s", name); - - ret = 0; -error: - _leave(" = %d", ret); - return ret; + return 0; bad_ticket: - *_abort_code = RXKADBADTICKET; - ret = -EBADMSG; - goto error; + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); + abort_code = RXKADBADTICKET; + ret = -EPROTO; +other_error: + *_abort_code = abort_code; + return ret; +temporary_error: + return ret; } /* @@ -1020,6 +1043,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, __attribute__((aligned(8))); /* must be aligned for crypto */ struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; + const char *eproto; time_t expiry; void *ticket; u32 abort_code, version, kvno, ticket_len, level; @@ -1028,6 +1052,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); + eproto = tracepoint_string("rxkad_rsp_short"); abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &response, sizeof(response)) < 0) @@ -1041,40 +1066,43 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", sp->hdr.serial, version, kvno, ticket_len); + eproto = tracepoint_string("rxkad_rsp_ver"); abort_code = RXKADINCONSISTENCY; if (version != RXKAD_VERSION) goto protocol_error; + eproto = tracepoint_string("rxkad_rsp_tktlen"); abort_code = RXKADTICKETLEN; if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) goto protocol_error; + eproto = tracepoint_string("rxkad_rsp_unkkey"); abort_code = RXKADUNKNOWNKEY; if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) goto protocol_error; /* extract the kerberos ticket and decrypt and decode it */ + ret = -ENOMEM; ticket = kmalloc(ticket_len, GFP_NOFS); if (!ticket) - return -ENOMEM; + goto temporary_error; + eproto = tracepoint_string("rxkad_tkt_short"); abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), ticket, ticket_len) < 0) goto protocol_error_free; - ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key, - &expiry, &abort_code); - if (ret < 0) { - *_abort_code = abort_code; - kfree(ticket); - return ret; - } + ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key, + &expiry, _abort_code); + if (ret < 0) + goto temporary_error_free; /* use the session key from inside the ticket to decrypt the * response */ rxkad_decrypt_response(conn, &response, &session_key); + eproto = tracepoint_string("rxkad_rsp_param"); abort_code = RXKADSEALEDINCON; if (ntohl(response.encrypted.epoch) != conn->proto.epoch) goto protocol_error_free; @@ -1085,6 +1113,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, csum = response.encrypted.checksum; response.encrypted.checksum = 0; rxkad_calc_response_checksum(&response); + eproto = tracepoint_string("rxkad_rsp_csum"); if (response.encrypted.checksum != csum) goto protocol_error_free; @@ -1093,11 +1122,15 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, struct rxrpc_call *call; u32 call_id = ntohl(response.encrypted.call_id[i]); + eproto = tracepoint_string("rxkad_rsp_callid"); if (call_id > INT_MAX) goto protocol_error_unlock; + eproto = tracepoint_string("rxkad_rsp_callctr"); if (call_id < conn->channels[i].call_counter) goto protocol_error_unlock; + + eproto = tracepoint_string("rxkad_rsp_callst"); if (call_id > conn->channels[i].call_counter) { call = rcu_dereference_protected( conn->channels[i].call, @@ -1109,10 +1142,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, } spin_unlock(&conn->channel_lock); + eproto = tracepoint_string("rxkad_rsp_seq"); abort_code = RXKADOUTOFSEQUENCE; if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1) goto protocol_error_free; + eproto = tracepoint_string("rxkad_rsp_level"); abort_code = RXKADLEVELFAIL; level = ntohl(response.encrypted.level); if (level > RXRPC_SECURITY_ENCRYPT) @@ -1123,10 +1158,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, * this the connection security can be handled in exactly the same way * as for a client connection */ ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno); - if (ret < 0) { - kfree(ticket); - return ret; - } + if (ret < 0) + goto temporary_error_free; kfree(ticket); _leave(" = 0"); @@ -1137,9 +1170,18 @@ protocol_error_unlock: protocol_error_free: kfree(ticket); protocol_error: + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); *_abort_code = abort_code; - _leave(" = -EPROTO [%d]", abort_code); return -EPROTO; + +temporary_error_free: + kfree(ticket); +temporary_error: + /* Ignore the response packet if we got a temporary error such as + * ENOMEM. We just want to send the challenge again. Note that we + * also come out this way if the ticket decryption fails. + */ + return ret; } /* diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 97ab214ca411..96ffa5d5733b 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -556,7 +556,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = -ESHUTDOWN; } else if (cmd == RXRPC_CMD_SEND_ABORT) { ret = 0; - if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED)) + if (rxrpc_abort_call("CMD", call, 0, abort_code, -ECONNABORTED)) ret = rxrpc_send_abort_packet(call); } else if (cmd != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; @@ -623,7 +623,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, read_unlock_bh(&call->state_lock); break; default: - /* Request phase complete for this client call */ + /* Request phase complete for this client call */ + trace_rxrpc_rx_eproto(call, 0, tracepoint_string("late_send")); ret = -EPROTO; break; } @@ -642,20 +643,24 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data); * @error: Local error value * @why: 3-char string indicating why. * - * Allow a kernel service to abort a call, if it's still in an abortable state. + * Allow a kernel service to abort a call, if it's still in an abortable state + * and return true if the call was aborted, false if it was already complete. */ -void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, +bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, u32 abort_code, int error, const char *why) { + bool aborted; + _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); mutex_lock(&call->user_mutex); - if (rxrpc_abort_call(why, call, 0, abort_code, error)) + aborted = rxrpc_abort_call(why, call, 0, abort_code, error); + if (aborted) rxrpc_send_abort_packet(call); mutex_unlock(&call->user_mutex); - _leave(""); + return aborted; } EXPORT_SYMBOL(rxrpc_kernel_abort_call); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index b70aa57319ea..79d875c6e8a0 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -557,7 +557,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, int err; if (name == NULL) { - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); if (err < 0) goto err_out; err = -EINVAL; @@ -654,7 +654,7 @@ int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, int err; int i; - err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL); if (err < 0) return err; @@ -786,7 +786,7 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, int index; int err; - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); if (err < 0) goto err_out; @@ -835,7 +835,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, b = skb_tail_pointer(skb); - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); if (err < 0) goto err_out; @@ -921,7 +921,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, struct tc_action *act; LIST_HEAD(actions); - ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); + ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL); if (ret < 0) return ret; @@ -1004,7 +1004,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n) !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); + ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL, + NULL); if (ret < 0) return ret; @@ -1051,19 +1052,20 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n) struct nlattr *nla[TCAA_MAX + 1]; struct nlattr *kind; - if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, NULL) < 0) + if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, + NULL, NULL) < 0) return NULL; tb1 = nla[TCA_ACT_TAB]; if (tb1 == NULL) return NULL; if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1), - NLMSG_ALIGN(nla_len(tb1)), NULL) < 0) + NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0) return NULL; if (tb[1] == NULL) return NULL; - if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL) < 0) + if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL, NULL) < 0) return NULL; kind = tb2[TCA_ACT_KIND]; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 520baa41cba3..d33947d6e9d0 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -283,7 +283,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); + ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy, NULL); if (ret < 0) return ret; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index f9bb43c25697..2155bc6c6a1e 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -109,7 +109,8 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy); + ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy, + NULL); if (ret < 0) return ret; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index e978ccd4402c..ab6fdbd34db7 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -59,7 +59,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy); + err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy, NULL); if (err < 0) return err; @@ -181,6 +181,9 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl, struct tcphdr *tcph; const struct iphdr *iph; + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) + return 1; + tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); if (tcph == NULL) return 0; @@ -202,6 +205,9 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl, struct tcphdr *tcph; const struct ipv6hdr *ip6h; + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + return 1; + tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); if (tcph == NULL) return 0; @@ -225,6 +231,9 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, const struct iphdr *iph; u16 ul; + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + return 1; + /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, @@ -278,6 +287,9 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, const struct ipv6hdr *ip6h; u16 ul; + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + return 1; + /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e6c874a2b283..99afe8b1f1fb 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -73,7 +73,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy); + err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 71e7ff22f7c9..c5dec308b8b1 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -443,7 +443,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, int ret = 0; int err; - err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy); + err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy, NULL); if (err < 0) return err; @@ -514,7 +514,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (tb[TCA_IFE_METALST]) { err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST], - NULL); + NULL, NULL); if (err) { metadata_parse_err: if (exists) @@ -603,8 +603,8 @@ nla_put_failure: return -1; } -int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife, - u16 metaid, u16 mlen, void *mdata) +static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife, + u16 metaid, u16 mlen, void *mdata) { struct tcf_meta_info *e; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 992ef8d624f1..36f0ced9e60c 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -107,7 +107,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy); + err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index af49c7dca860..1b5549ababd4 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -87,7 +87,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy); + ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL); if (ret < 0) return ret; if (tb[TCA_MIRRED_PARMS] == NULL) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 9b6aec665495..9016ab8a0649 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -50,7 +50,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy); + err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index c1310472f620..164b5ac094be 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -72,7 +72,7 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, } err = nla_parse_nested(tb, TCA_PEDIT_KEY_EX_MAX, ka, - pedit_key_ex_policy); + pedit_key_ex_policy, NULL); if (err) goto err_out; @@ -147,7 +147,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy); + err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0ba91d1ce994..f42008b29311 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -90,7 +90,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy); + err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 0b8217b4763f..59d6645a4007 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -50,7 +50,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy); + ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy, NULL); if (ret < 0) return ret; if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] || diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 823a73ad0c60..43605e7ce051 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -94,7 +94,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy); + err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 06ccae3c12ee..6b3e65d7de0c 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -82,7 +82,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy); + err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index c736627f8f4a..a73c4bbcada2 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -103,7 +103,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy); + err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index e3a58e021198..b9a2f241a5b3 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -89,7 +89,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy); + err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 19e0dba305ce..13ba3a89f675 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -121,7 +121,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - err = nla_parse_nested(tb, TCA_VLAN_MAX, nla, vlan_policy); + err = nla_parse_nested(tb, TCA_VLAN_MAX, nla, vlan_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 732f7cae459d..e2c68c30f97d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -229,7 +229,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) replay: tp_created = 0; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, NULL); if (err < 0) return err; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 5877f6061b57..422414f16b38 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -174,7 +174,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; err = nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS], - basic_policy); + basic_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 80f688436dd7..7ddd08efaa0f 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -478,7 +478,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (tca[TCA_OPTIONS] == NULL) return -EINVAL; - ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy); + ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy, + NULL); if (ret < 0) return ret; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index c1f20077837f..b5e7c1bee6c3 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -99,7 +99,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, new->handle = handle; new->tp = tp; err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], - cgroup_policy); + cgroup_policy, NULL); if (err < 0) goto errout; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 3d6b9286c203..008ba7e63b7a 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -400,7 +400,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy); + err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy, NULL); if (err < 0) return err; @@ -508,9 +508,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, get_random_bytes(&fnew->hashrnd, 4); } - fnew->perturb_timer.function = flow_perturbation; - fnew->perturb_timer.data = (unsigned long)fnew; - init_timer_deferrable(&fnew->perturb_timer); + setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation, + (unsigned long)fnew); tcf_exts_change(tp, &fnew->exts, &e); tcf_em_tree_change(tp, &fnew->ematches, &t); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 9d0c99d2e9fb..3e7bd7801aa8 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -848,7 +848,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!tb) return -ENOBUFS; - err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy); + err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], + fl_policy, NULL); if (err < 0) goto errout_tb; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 9dc63d54e167..996209083c6b 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -250,7 +250,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (!opt) return handle ? -EINVAL : 0; /* Succeed if it is old method. */ - err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy); + err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 224eb2c14346..0dbcca62aa6a 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -161,8 +161,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (head) return -EEXIST; - err = nla_parse_nested(tb, TCA_MATCHALL_MAX, - tca[TCA_OPTIONS], mall_policy); + err = nla_parse_nested(tb, TCA_MATCHALL_MAX, tca[TCA_OPTIONS], + mall_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 455fc8f83d0a..a371075c1d7a 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -489,7 +489,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return handle ? -EINVAL : 0; - err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy); + err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 322438fb3ffc..d7f2923e6ebd 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -484,7 +484,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return handle ? -EINVAL : 0; - err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy); + err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 0751245a6ace..2ab001361457 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -482,7 +482,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, if (!opt) return 0; - err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy); + err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4dbe0c680fe6..9e2f330ac80f 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -860,7 +860,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return handle ? -EINVAL : 0; - err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy); + err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL); if (err < 0) return err; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index ae7e4f5b348b..eb0e9bab54c1 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -912,7 +912,7 @@ static int em_meta_change(struct net *net, void *data, int len, struct tcf_meta_hdr *hdr; struct meta_match *meta = NULL; - err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy); + err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy, NULL); if (err < 0) goto errout; diff --git a/net/sched/ematch.c b/net/sched/ematch.c index fbb7ebfc58c6..03b677bc0700 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -314,7 +314,7 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla, if (!nla) return 0; - err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy); + err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy, NULL); if (err < 0) goto errout; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bcf49cd22786..fcb5ae581c04 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -274,7 +274,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) return NULL; } -void qdisc_hash_add(struct Qdisc *q) +void qdisc_hash_add(struct Qdisc *q, bool invisible) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { struct Qdisc *root = qdisc_dev(q)->qdisc; @@ -282,6 +282,8 @@ void qdisc_hash_add(struct Qdisc *q) WARN_ON_ONCE(root == &noop_qdisc); ASSERT_RTNL(); hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); + if (invisible) + q->flags |= TCQ_F_INVISIBLE; } } EXPORT_SYMBOL(qdisc_hash_add); @@ -455,7 +457,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) u16 *tab = NULL; int err; - err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy); + err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL); if (err < 0) return ERR_PTR(err); if (!tb[TCA_STAB_BASE]) @@ -1003,7 +1005,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, goto err_out4; } - qdisc_hash_add(sch); + qdisc_hash_add(sch, false); return sch; } @@ -1129,7 +1131,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n) !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); if (err < 0) return err; @@ -1198,7 +1200,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n) replay: /* Reinit, just in case something touches this. */ - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); if (err < 0) return err; @@ -1401,9 +1403,14 @@ nla_put_failure: return -1; } -static bool tc_qdisc_dump_ignore(struct Qdisc *q) +static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) { - return (q->flags & TCQ_F_BUILTIN) ? true : false; + if (q->flags & TCQ_F_BUILTIN) + return true; + if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) + return true; + + return false; } static int qdisc_notify(struct net *net, struct sk_buff *oskb, @@ -1417,12 +1424,12 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (old && !tc_qdisc_dump_ignore(old)) { + if (old && !tc_qdisc_dump_ignore(old, false)) { if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) goto err_out; } - if (new && !tc_qdisc_dump_ignore(new)) { + if (new && !tc_qdisc_dump_ignore(new, false)) { if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) goto err_out; @@ -1439,7 +1446,8 @@ err_out: static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, struct netlink_callback *cb, - int *q_idx_p, int s_q_idx, bool recur) + int *q_idx_p, int s_q_idx, bool recur, + bool dump_invisible) { int ret = 0, q_idx = *q_idx_p; struct Qdisc *q; @@ -1452,7 +1460,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, if (q_idx < s_q_idx) { q_idx++; } else { - if (!tc_qdisc_dump_ignore(q) && + if (!tc_qdisc_dump_ignore(q, dump_invisible) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) @@ -1474,7 +1482,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, q_idx++; continue; } - if (!tc_qdisc_dump_ignore(q) && + if (!tc_qdisc_dump_ignore(q, dump_invisible) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) @@ -1496,12 +1504,21 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; + const struct nlmsghdr *nlh = cb->nlh; + struct tcmsg *tcm = nlmsg_data(nlh); + struct nlattr *tca[TCA_MAX + 1]; + int err; s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; idx = 0; ASSERT_RTNL(); + + err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); + if (err < 0) + return err; + for_each_netdev(net, dev) { struct netdev_queue *dev_queue; @@ -1512,13 +1529,14 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) q_idx = 0; if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, - true) < 0) + true, tca[TCA_DUMP_INVISIBLE]) < 0) goto done; dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, - &q_idx, s_q_idx, false) < 0) + &q_idx, s_q_idx, false, + tca[TCA_DUMP_INVISIBLE]) < 0) goto done; cont: @@ -1559,7 +1577,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); if (err < 0) return err; @@ -1762,7 +1780,7 @@ static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, { struct qdisc_dump_args arg; - if (tc_qdisc_dump_ignore(q) || + if (tc_qdisc_dump_ignore(q, false) || *t_p < s_t || !q->ops->cl_ops || (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle)) { diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 2209c2ddacbf..40cbceed4de8 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -214,7 +214,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, if (opt == NULL) return -EINVAL; - error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy); + error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy, NULL); if (error < 0) return error; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index d6ca18dc04c3..7415859fd4c3 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1137,7 +1137,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) struct tc_ratespec *r; int err; - err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy); + err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL); if (err < 0) return err; @@ -1161,6 +1161,8 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) sch->handle); if (!q->link.q) q->link.q = &noop_qdisc; + else + qdisc_hash_add(q->link.q, true); q->link.priority = TC_CBQ_MAXPRIO - 1; q->link.priority2 = TC_CBQ_MAXPRIO - 1; @@ -1472,7 +1474,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy); + err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL); if (err < 0) return err; @@ -1600,6 +1602,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); if (!cl->q) cl->q = &noop_qdisc; + else + qdisc_hash_add(cl->q, true); + cl->common.classid = classid; cl->tparent = parent; cl->qdisc = sch; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 3b86a97bc67c..d00f4c7c2f3a 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -58,7 +58,6 @@ struct choke_sched_data { /* Variables */ struct red_vars vars; - struct tcf_proto __rcu *filter_list; struct { u32 prob_drop; /* Early probability drops */ u32 prob_mark; /* Early probability marks */ @@ -152,11 +151,6 @@ static inline void choke_set_classid(struct sk_buff *skb, u16 classid) choke_skb_cb(skb)->classid = classid; } -static u16 choke_get_classid(const struct sk_buff *skb) -{ - return choke_skb_cb(skb)->classid; -} - /* * Compare flow of two packets * Returns true only if source and destination address and port match. @@ -188,40 +182,6 @@ static bool choke_match_flow(struct sk_buff *skb1, } /* - * Classify flow using either: - * 1. pre-existing classification result in skb - * 2. fast internal classification - * 3. use TC filter based classification - */ -static bool choke_classify(struct sk_buff *skb, - struct Qdisc *sch, int *qerr) - -{ - struct choke_sched_data *q = qdisc_priv(sch); - struct tcf_result res; - struct tcf_proto *fl; - int result; - - fl = rcu_dereference_bh(q->filter_list); - result = tc_classify(skb, fl, &res, false); - if (result >= 0) { -#ifdef CONFIG_NET_CLS_ACT - switch (result) { - case TC_ACT_STOLEN: - case TC_ACT_QUEUED: - *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - case TC_ACT_SHOT: - return false; - } -#endif - choke_set_classid(skb, TC_H_MIN(res.classid)); - return true; - } - - return false; -} - -/* * Select a packet at random from queue * HACK: since queue can have holes from previous deletion; retry several * times to find a random skb but then just give up and return the head @@ -257,25 +217,15 @@ static bool choke_match_random(const struct choke_sched_data *q, return false; oskb = choke_peek_random(q, pidx); - if (rcu_access_pointer(q->filter_list)) - return choke_get_classid(nskb) == choke_get_classid(oskb); - return choke_match_flow(oskb, nskb); } static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; struct choke_sched_data *q = qdisc_priv(sch); const struct red_parms *p = &q->parms; - if (rcu_access_pointer(q->filter_list)) { - /* If using external classifiers, get result and record it. */ - if (!choke_classify(skb, sch, &ret)) - goto other_drop; /* Packet was eaten by filter */ - } - choke_skb_cb(skb)->keys_valid = 0; /* Compute average queue usage (see RED) */ q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen); @@ -339,12 +289,6 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, congestion_drop: qdisc_drop(skb, sch, to_free); return NET_XMIT_CN; - -other_drop: - if (ret & __NET_XMIT_BYPASS) - qdisc_qstats_drop(sch); - __qdisc_drop(skb, to_free); - return ret; } static struct sk_buff *choke_dequeue(struct Qdisc *sch) @@ -413,7 +357,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy); + err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy, NULL); if (err < 0) return err; @@ -538,7 +482,6 @@ static void choke_destroy(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&q->filter_list); choke_free(q->tab); } diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 5bfa79ee657c..c518a1efcb9d 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -140,7 +140,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy); + err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy, NULL); if (err < 0) return err; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index bb4cbdf75004..58a8c32eab23 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -76,7 +76,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy); + err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, NULL); if (err < 0) return err; @@ -117,6 +117,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; + else + qdisc_hash_add(cl->qdisc, true); if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 802ac7c2e5e8..1c0f877f673a 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -129,7 +129,7 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, if (!opt) goto errout; - err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy); + err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL); if (err < 0) goto errout; @@ -201,9 +201,13 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); if (p->set_tc_index) { + int wlen = skb_network_offset(skb); + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): - if (skb_cow_head(skb, sizeof(struct iphdr))) + wlen += sizeof(struct iphdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) goto drop; skb->tc_index = ipv4_get_dsfield(ip_hdr(skb)) @@ -211,7 +215,9 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, break; case htons(ETH_P_IPV6): - if (skb_cow_head(skb, sizeof(struct ipv6hdr))) + wlen += sizeof(struct ipv6hdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) goto drop; skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb)) @@ -336,7 +342,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) goto errout; - err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy); + err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL); if (err < 0) goto errout; @@ -368,6 +374,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle); if (p->q == NULL) p->q = &noop_qdisc; + else + qdisc_hash_add(p->q, true); pr_debug("%s: qdisc %p\n", __func__, p->q); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a4f738ac7728..da4f67bda0ee 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -698,7 +698,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy); + err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy, NULL); if (err < 0) return err; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 9f3a884d1590..18bbb5476c83 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -288,7 +288,6 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) struct fq_codel_flow *flow; struct list_head *head; u32 prev_drop_count, prev_ecn_mark; - unsigned int prev_backlog; begin: head = &q->new_flows; @@ -307,7 +306,6 @@ begin: prev_drop_count = q->cstats.drop_count; prev_ecn_mark = q->cstats.ecn_mark; - prev_backlog = sch->qstats.backlog; skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams, &flow->cvars, &q->cstats, qdisc_pkt_len, @@ -385,7 +383,8 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy); + err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy, + NULL); if (err < 0) return err; if (tb[TCA_FQ_CODEL_FLOWS]) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b052b27a984e..52a2c55f6d9e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -794,8 +794,8 @@ static void attach_default_qdiscs(struct net_device *dev) } } #ifdef CONFIG_NET_SCHED - if (dev->qdisc) - qdisc_hash_add(dev->qdisc); + if (dev->qdisc != &noop_qdisc) + qdisc_hash_add(dev->qdisc, false); #endif } diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index c78a093c551a..17c7130454bd 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -401,7 +401,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy); + err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL); if (err < 0) return err; @@ -470,7 +470,7 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy); + err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL); if (err < 0) return err; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 3ffaa6fb0990..5cb82f6c1b06 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -957,7 +957,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy); + err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy, NULL); if (err < 0) return err; @@ -1066,6 +1066,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; + else + qdisc_hash_add(cl->qdisc, true); INIT_LIST_HEAD(&cl->children); cl->vt_tree = RB_ROOT; cl->cf_tree = RB_ROOT; @@ -1425,6 +1427,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; + else + qdisc_hash_add(q->root.qdisc, true); INIT_LIST_HEAD(&q->root.children); q->root.vt_tree = RB_ROOT; q->root.cf_tree = RB_ROOT; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 2fae8b5f1b80..c19d346e6c5a 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -529,7 +529,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy); + err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy, NULL); if (err < 0) return err; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 4cd5fb134bc9..570ef3b0c09b 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1017,7 +1017,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy); + err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL); if (err < 0) return err; @@ -1342,7 +1342,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!opt) goto failure; - err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy); + err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL); if (err < 0) goto failure; @@ -1460,6 +1460,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, qdisc_class_hash_insert(&q->clhash, &cl->common); if (parent) parent->children++; + if (cl->un.leaf.q != &noop_qdisc) + qdisc_hash_add(cl->un.leaf.q, true); } else { if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 20b7f1646f69..cadfdd4f1e52 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -84,7 +84,7 @@ static void mq_attach(struct Qdisc *sch) qdisc_destroy(old); #ifdef CONFIG_NET_SCHED if (ntx < dev->real_num_tx_queues) - qdisc_hash_add(qdisc); + qdisc_hash_add(qdisc, false); #endif } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 922683418e53..0a4cf27ea54b 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -21,14 +21,13 @@ struct mqprio_sched { struct Qdisc **qdiscs; - int hw_owned; + int hw_offload; }; static void mqprio_destroy(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); - struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO}; unsigned int ntx; if (priv->qdiscs) { @@ -39,10 +38,15 @@ static void mqprio_destroy(struct Qdisc *sch) kfree(priv->qdiscs); } - if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) + if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) { + struct tc_mqprio_qopt offload = { 0 }; + struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO, + { .mqprio = &offload } }; + dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); - else + } else { netdev_set_num_tc(dev, 0); + } } static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) @@ -59,15 +63,20 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) return -EINVAL; } - /* net_device does not support requested operation */ - if (qopt->hw && !dev->netdev_ops->ndo_setup_tc) - return -EINVAL; + /* Limit qopt->hw to maximum supported offload value. Drivers have + * the option of overriding this later if they don't support the a + * given offload type. + */ + if (qopt->hw > TC_MQPRIO_HW_OFFLOAD_MAX) + qopt->hw = TC_MQPRIO_HW_OFFLOAD_MAX; - /* if hw owned qcount and qoffset are taken from LLD so - * no reason to verify them here + /* If hardware offload is requested we will leave it to the device + * to either populate the queue counts itself or to validate the + * provided queue counts. If ndo_setup_tc is not present then + * hardware doesn't support offload and we should return an error. */ if (qopt->hw) - return 0; + return dev->netdev_ops->ndo_setup_tc ? 0 : -EINVAL; for (i = 0; i < qopt->num_tc; i++) { unsigned int last = qopt->offset[i] + qopt->count[i]; @@ -139,13 +148,15 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) * supplied and verified mapping */ if (qopt->hw) { - struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO, - { .tc = qopt->num_tc }}; + struct tc_mqprio_qopt offload = *qopt; + struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO, + { .mqprio = &offload } }; - priv->hw_owned = 1; err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); if (err) return err; + + priv->hw_offload = offload.hw; } else { netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) @@ -175,7 +186,7 @@ static void mqprio_attach(struct Qdisc *sch) if (old) qdisc_destroy(old); if (ntx < dev->real_num_tx_queues) - qdisc_hash_add(qdisc); + qdisc_hash_add(qdisc, false); } kfree(priv->qdiscs); priv->qdiscs = NULL; @@ -243,7 +254,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) opt.num_tc = netdev_get_num_tc(dev); memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); - opt.hw = priv->hw_owned; + opt.hw = priv->hw_offload; for (i = 0; i < netdev_get_num_tc(dev); i++) { opt.count[i] = dev->tc_to_txq[i].count; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index e7839a0d0eaa..43a3a10b3c81 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -217,6 +217,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) sch_tree_lock(sch); old = q->queues[i]; q->queues[i] = child; + if (child != &noop_qdisc) + qdisc_hash_add(child, true); if (old != &noop_qdisc) { qdisc_tree_reduce_backlog(old, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index c8bb62a1e744..f0ce4780f395 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -462,7 +462,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* If a delay is expected, orphan the skb. (orphaning usually takes * place at TX completion time, so _before_ the link transit delay) */ - if (q->latency || q->jitter) + if (q->latency || q->jitter || q->rate) skb_orphan_partial(skb); /* @@ -530,21 +530,31 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, now = psched_get_time(); if (q->rate) { - struct sk_buff *last; + struct netem_skb_cb *last = NULL; + + if (sch->q.tail) + last = netem_skb_cb(sch->q.tail); + if (q->t_root.rb_node) { + struct sk_buff *t_skb; + struct netem_skb_cb *t_last; + + t_skb = netem_rb_to_skb(rb_last(&q->t_root)); + t_last = netem_skb_cb(t_skb); + if (!last || + t_last->time_to_send > last->time_to_send) { + last = t_last; + } + } - if (sch->q.qlen) - last = sch->q.tail; - else - last = netem_rb_to_skb(rb_last(&q->t_root)); if (last) { /* * Last packet in queue is reference point (now), * calculate this time bonus and subtract * from delay. */ - delay -= netem_skb_cb(last)->time_to_send - now; + delay -= last->time_to_send - now; delay = max_t(psched_tdiff_t, 0, delay); - now = netem_skb_cb(last)->time_to_send; + now = last->time_to_send; } delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q); @@ -833,7 +843,7 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, if (nested_len >= nla_attr_size(0)) return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), - nested_len, policy); + nested_len, policy, NULL); memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); return 0; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 5c3a99d6aa82..6c2791d6102d 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -190,7 +190,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy); + err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy, NULL); if (err < 0) return err; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index d4d7db267b6e..92c2e6d448d7 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -192,8 +192,11 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) qdisc_destroy(child); } - for (i = oldbands; i < q->bands; i++) + for (i = oldbands; i < q->bands; i++) { q->queues[i] = queues[i]; + if (q->queues[i] != &noop_qdisc) + qdisc_hash_add(q->queues[i], true); + } sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index f9e712ce2d15..041eba3006cc 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -418,7 +418,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -EINVAL; } - err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy); + err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy, + NULL); if (err < 0) return err; @@ -494,6 +495,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, goto destroy_class; } + if (cl->qdisc != &noop_qdisc) + qdisc_hash_add(cl->qdisc, true); sch_tree_lock(sch); qdisc_class_hash_insert(&q->clhash, &cl->common); sch_tree_unlock(sch); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 249b2a18acbd..11292adce412 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -173,7 +173,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy); + err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL); if (err < 0) return err; @@ -191,6 +191,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) return PTR_ERR(child); } + if (child != &noop_qdisc) + qdisc_hash_add(child, true); sch_tree_lock(sch); q->flags = ctl->flags; q->limit = ctl->limit; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index fe6963d21519..0f777273ba29 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -495,7 +495,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt) int err; if (opt) { - err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy); + err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy, NULL); if (err < 0) return -EINVAL; @@ -513,6 +513,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt) if (IS_ERR(child)) return PTR_ERR(child); + if (child != &noop_qdisc) + qdisc_hash_add(child, true); sch_tree_lock(sch); qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 42e8c8615e65..b00e02c139de 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -714,9 +714,8 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) struct sfq_sched_data *q = qdisc_priv(sch); int i; - q->perturb_timer.function = sfq_perturbation; - q->perturb_timer.data = (unsigned long)sch; - init_timer_deferrable(&q->perturb_timer); + setup_deferrable_timer(&q->perturb_timer, sfq_perturbation, + (unsigned long)sch); for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) { q->dep[i].next = i + SFQ_MAX_FLOWS; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 303355c449ab..b2e4b6ad241a 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -315,7 +315,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) s64 buffer, mtu; u64 rate64 = 0, prate64 = 0; - err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy); + err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy, NULL); if (err < 0) return err; @@ -396,6 +396,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->qdisc->qstats.backlog); qdisc_destroy(q->qdisc); q->qdisc = child; + if (child != &noop_qdisc) + qdisc_hash_add(child, true); } q->limit = qopt->limit; if (tb[TCA_TBF_PBURST]) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 2a6835b4562b..a9708da28eb5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -71,9 +71,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a { struct net *net = sock_net(sk); struct sctp_sock *sp; - int i; sctp_paramhdr_t *p; - int err; + int i; /* Retrieve the SCTP per socket area. */ sp = sctp_sk((struct sock *)sk); @@ -247,6 +246,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; + if (sctp_stream_new(asoc, gfp)) + goto fail_init; + /* Assume that peer would support both address types unless we are * told otherwise. */ @@ -264,9 +266,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* AUTH related initializations */ INIT_LIST_HEAD(&asoc->endpoint_shared_keys); - err = sctp_auth_asoc_copy_shkeys(ep, asoc, gfp); - if (err) - goto fail_init; + if (sctp_auth_asoc_copy_shkeys(ep, asoc, gfp)) + goto stream_free; asoc->active_key_id = ep->active_key_id; asoc->prsctp_enable = ep->prsctp_enable; @@ -289,6 +290,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a return asoc; +stream_free: + sctp_stream_free(asoc->stream); fail_init: sock_put(asoc->base.sk); sctp_endpoint_put(asoc->ep); @@ -1409,7 +1412,7 @@ sctp_assoc_choose_alter_transport(struct sctp_association *asoc, /* Update the association's pmtu and frag_point by going through all the * transports. This routine is called when a transport's PMTU has changed. */ -void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) +void sctp_assoc_sync_pmtu(struct sctp_association *asoc) { struct sctp_transport *t; __u32 pmtu = 0; @@ -1421,8 +1424,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->pmtu_pending && t->dst) { - sctp_transport_update_pmtu(sk, t, - SCTP_TRUNC4(dst_mtu(t->dst))); + sctp_transport_update_pmtu( + t, SCTP_TRUNC4(dst_mtu(t->dst))); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index e3621cb4827f..697721a7a3f1 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -306,14 +306,24 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && time_after(jiffies, chunk->msg->expires_at)) { - if (chunk->sent_count) + struct sctp_stream_out *streamout = + &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream]; + + if (chunk->sent_count) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; - else + streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++; + } else { chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; + streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; + } return 1; } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && chunk->sent_count > chunk->sinfo.sinfo_timetolive) { + struct sctp_stream_out *streamout = + &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream]; + chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; + streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++; return 1; } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && chunk->msg->expires_at && diff --git a/net/sctp/input.c b/net/sctp/input.c index 2a28ab20487f..0e06a278d2a9 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -401,10 +401,10 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, if (t->param_flags & SPP_PMTUD_ENABLE) { /* Update transports view of the MTU */ - sctp_transport_update_pmtu(sk, t, pmtu); + sctp_transport_update_pmtu(t, pmtu); /* Update association pmtu. */ - sctp_assoc_sync_pmtu(sk, asoc); + sctp_assoc_sync_pmtu(asoc); } /* Retransmit with the new pmtu setting. diff --git a/net/sctp/output.c b/net/sctp/output.c index 71ce6b945dcb..1409a875ad8e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -86,43 +86,53 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, { struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; + struct sock *sk; pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); - packet->vtag = vtag; - if (asoc && tp->dst) { - struct sock *sk = asoc->base.sk; - - rcu_read_lock(); - if (__sk_dst_get(sk) != tp->dst) { - dst_hold(tp->dst); - sk_setup_caps(sk, tp->dst); - } - - if (sk_can_gso(sk)) { - struct net_device *dev = tp->dst->dev; + /* do the following jobs only once for a flush schedule */ + if (!sctp_packet_empty(packet)) + return; - packet->max_size = dev->gso_max_size; - } else { - packet->max_size = asoc->pathmtu; - } - rcu_read_unlock(); + /* set packet max_size with pathmtu */ + packet->max_size = tp->pathmtu; + if (!asoc) + return; - } else { - packet->max_size = tp->pathmtu; + /* update dst or transport pathmtu if in need */ + sk = asoc->base.sk; + if (!sctp_transport_dst_check(tp)) { + sctp_transport_route(tp, NULL, sctp_sk(sk)); + if (asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(asoc); + } else if (!sctp_transport_pmtu_check(tp)) { + if (asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(asoc); } - if (ecn_capable && sctp_packet_empty(packet)) { - struct sctp_chunk *chunk; + /* If there a is a prepend chunk stick it on the list before + * any other chunks get appended. + */ + if (ecn_capable) { + struct sctp_chunk *chunk = sctp_get_ecne_prepend(asoc); - /* If there a is a prepend chunk stick it on the list before - * any other chunks get appended. - */ - chunk = sctp_get_ecne_prepend(asoc); if (chunk) sctp_packet_append_chunk(packet, chunk); } + + if (!tp->dst) + return; + + /* set packet max_size with gso_max_size if gso is enabled*/ + rcu_read_lock(); + if (__sk_dst_get(sk) != tp->dst) { + dst_hold(tp->dst); + sk_setup_caps(sk, tp->dst); + } + packet->max_size = sk_can_gso(sk) ? tp->dst->dev->gso_max_size + : asoc->pathmtu; + rcu_read_unlock(); } /* Initialize the packet structure. */ @@ -546,7 +556,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) struct sctp_association *asoc = tp->asoc; struct sctp_chunk *chunk, *tmp; int pkt_count, gso = 0; - int confirm; struct dst_entry *dst; struct sk_buff *head; struct sctphdr *sh; @@ -583,12 +592,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) sh->vtag = htonl(packet->vtag); sh->checksum = 0; - /* update dst if in need */ - if (!sctp_transport_dst_check(tp)) { - sctp_transport_route(tp, NULL, sctp_sk(sk)); - if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE) - sctp_assoc_sync_pmtu(sk, asoc); - } + /* drop packet if no dst */ dst = dst_clone(tp->dst); if (!dst) { IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); @@ -625,13 +629,13 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) asoc->peer.last_sent_to = tp; } head->ignore_df = packet->ipfragok; - confirm = tp->dst_pending_confirm; - if (confirm) + if (tp->dst_pending_confirm) skb_set_dst_pending_confirm(head, 1); /* neighbour should be confirmed on successful transmission or * positive error */ - if (tp->af_specific->sctp_xmit(head, tp) >= 0 && confirm) + if (tp->af_specific->sctp_xmit(head, tp) >= 0 && + tp->dst_pending_confirm) tp->dst_pending_confirm = 0; out: @@ -705,7 +709,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, */ if ((sctp_sk(asoc->base.sk)->nodelay || inflight == 0) && - !chunk->msg->force_delay) + !asoc->force_delay) /* Nothing unacked */ return SCTP_XMIT_OK; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index db352e5d61f8..fe4c3d462f6e 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -353,6 +353,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, struct sctp_chunk *chk, *temp; list_for_each_entry_safe(chk, temp, queue, transmitted_list) { + struct sctp_stream_out *streamout; + if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; @@ -361,8 +363,10 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, sctp_insert_list(&asoc->outqueue.abandoned, &chk->transmitted_list); + streamout = &asoc->stream->out[chk->sinfo.sinfo_stream]; asoc->sent_cnt_removable--; asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; + streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; if (!chk->tsn_gap_acked) { if (chk->transport) @@ -382,19 +386,26 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, } static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, - struct sctp_sndrcvinfo *sinfo, - struct list_head *queue, int msg_len) + struct sctp_sndrcvinfo *sinfo, int msg_len) { + struct sctp_outq *q = &asoc->outqueue; struct sctp_chunk *chk, *temp; - list_for_each_entry_safe(chk, temp, queue, list) { + list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; list_del_init(&chk->list); + q->out_qlen -= chk->skb->len; asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; + if (chk->sinfo.sinfo_stream < asoc->stream->outcnt) { + struct sctp_stream_out *streamout = + &asoc->stream->out[chk->sinfo.sinfo_stream]; + + streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; + } msg_len -= SCTP_DATA_SNDSIZE(chk) + sizeof(struct sk_buff) + @@ -431,9 +442,7 @@ void sctp_prsctp_prune(struct sctp_association *asoc, return; } - sctp_prsctp_prune_unsent(asoc, sinfo, - &asoc->outqueue.out_chunk_list, - msg_len); + sctp_prsctp_prune_unsent(asoc, sinfo, msg_len); } /* Mark all the eligible packets on a transport for retransmission. */ @@ -1027,8 +1036,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) /* RFC 2960 6.5 Every DATA chunk MUST carry a valid * stream identifier. */ - if (chunk->sinfo.sinfo_stream >= - asoc->c.sinit_num_ostreams) { + if (chunk->sinfo.sinfo_stream >= asoc->stream->outcnt) { /* Mark as failed send. */ sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 206377fe91ec..a0b29d43627f 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -361,8 +361,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sctp_seq_dump_remote_addrs(seq, assoc); seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " "%8d %8d %8d %8d", - assoc->hbinterval, assoc->c.sinit_max_instreams, - assoc->c.sinit_num_ostreams, assoc->max_retrans, + assoc->hbinterval, assoc->stream->incnt, + assoc->stream->outcnt, assoc->max_retrans, assoc->init_retries, assoc->shutdown_retries, assoc->rtx_data_chunks, atomic_read(&sk->sk_wmem_alloc), diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 969a30c7bb54..118faff6a332 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2460,15 +2460,10 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * association. */ if (!asoc->temp) { - int error; - - asoc->stream = sctp_stream_new(asoc->c.sinit_max_instreams, - asoc->c.sinit_num_ostreams, gfp); - if (!asoc->stream) + if (sctp_stream_init(asoc, gfp)) goto clean_up; - error = sctp_assoc_set_id(asoc, gfp); - if (error) + if (sctp_assoc_set_id(asoc, gfp)) goto clean_up; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index e03bb1aab4d0..4f5e6cfc7f60 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3872,9 +3872,18 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net, else if (param.p->type == SCTP_PARAM_RESET_IN_REQUEST) reply = sctp_process_strreset_inreq( (struct sctp_association *)asoc, param, &ev); - /* More handles for other types will be added here, by now it - * just ignores other types. - */ + else if (param.p->type == SCTP_PARAM_RESET_TSN_REQUEST) + reply = sctp_process_strreset_tsnreq( + (struct sctp_association *)asoc, param, &ev); + else if (param.p->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS) + reply = sctp_process_strreset_addstrm_out( + (struct sctp_association *)asoc, param, &ev); + else if (param.p->type == SCTP_PARAM_RESET_ADD_IN_STREAMS) + reply = sctp_process_strreset_addstrm_in( + (struct sctp_association *)asoc, param, &ev); + else if (param.p->type == SCTP_PARAM_RESET_RESPONSE) + reply = sctp_process_strreset_resp( + (struct sctp_association *)asoc, param, &ev); if (ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, @@ -3946,7 +3955,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, /* Silently discard the chunk if stream-id is not valid */ sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams) + if (ntohs(skip->stream) >= asoc->stream->incnt) goto discard_noforce; } @@ -4017,7 +4026,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( /* Silently discard the chunk if stream-id is not valid */ sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams) + if (ntohs(skip->stream) >= asoc->stream->incnt) goto gen_shutdown; } @@ -6353,7 +6362,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * and discard the DATA chunk. */ sid = ntohs(data_hdr->stream); - if (sid >= asoc->c.sinit_max_instreams) { + if (sid >= asoc->stream->incnt) { /* Mark tsn as received even though we drop it */ sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0f378ea2ae38..f16c8d97b7f3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1907,7 +1907,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) } if (asoc->pmtu_pending) - sctp_assoc_pending_pmtu(sk, asoc); + sctp_assoc_pending_pmtu(asoc); /* If fragmentation is disabled and the message length exceeds the * association fragmentation point, return EMSGSIZE. The I-D @@ -1920,7 +1920,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) } /* Check for invalid stream. */ - if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) { + if (sinfo->sinfo_stream >= asoc->stream->outcnt) { err = -EINVAL; goto out_free; } @@ -1965,7 +1965,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) err = PTR_ERR(datamsg); goto out_free; } - datamsg->force_delay = !!(msg->msg_flags & MSG_MORE); + asoc->force_delay = !!(msg->msg_flags & MSG_MORE); /* Now send the (possibly) fragmented message. */ list_for_each_entry(chunk, &datamsg->chunks, frag_list) { @@ -2435,7 +2435,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) { if (trans) { trans->pathmtu = params->spp_pathmtu; - sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); + sctp_assoc_sync_pmtu(asoc); } else if (asoc) { asoc->pathmtu = params->spp_pathmtu; } else { @@ -2451,7 +2451,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, (trans->param_flags & ~SPP_PMTUD) | pmtud_change; if (update) { sctp_transport_pmtu(trans, sctp_opt2sk(sp)); - sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); + sctp_assoc_sync_pmtu(asoc); } } else if (asoc) { asoc->param_flags = @@ -3758,6 +3758,39 @@ out: return retval; } +static int sctp_setsockopt_reconfig_supported(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + asoc->reconf_enable = !!params.assoc_value; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + sp->ep->reconf_enable = !!params.assoc_value; + } else { + goto out; + } + + retval = 0; + +out: + return retval; +} + static int sctp_setsockopt_enable_strreset(struct sock *sk, char __user *optval, unsigned int optlen) @@ -4038,6 +4071,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_DEFAULT_PRINFO: retval = sctp_setsockopt_default_prinfo(sk, optval, optlen); break; + case SCTP_RECONFIG_SUPPORTED: + retval = sctp_setsockopt_reconfig_supported(sk, optval, optlen); + break; case SCTP_ENABLE_STREAM_RESET: retval = sctp_setsockopt_enable_strreset(sk, optval, optlen); break; @@ -4461,8 +4497,8 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, info->sctpi_rwnd = asoc->a_rwnd; info->sctpi_unackdata = asoc->unack_data; info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); - info->sctpi_instrms = asoc->c.sinit_max_instreams; - info->sctpi_outstrms = asoc->c.sinit_num_ostreams; + info->sctpi_instrms = asoc->stream->incnt; + info->sctpi_outstrms = asoc->stream->outcnt; list_for_each(pos, &asoc->base.inqueue.in_chunk_list) info->sctpi_inqueue++; list_for_each(pos, &asoc->outqueue.out_chunk_list) @@ -4691,8 +4727,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, status.sstat_unackdata = asoc->unack_data; status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); - status.sstat_instrms = asoc->c.sinit_max_instreams; - status.sstat_outstrms = asoc->c.sinit_num_ostreams; + status.sstat_instrms = asoc->stream->incnt; + status.sstat_outstrms = asoc->stream->outcnt; status.sstat_fragmentation_point = asoc->frag_point; status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc); memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr, @@ -6540,6 +6576,102 @@ out: return retval; } +static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_stream_out *streamout; + struct sctp_association *asoc; + struct sctp_prstatus params; + int retval = -EINVAL; + int policy; + + if (len < sizeof(params)) + goto out; + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) { + retval = -EFAULT; + goto out; + } + + policy = params.sprstat_policy; + if (policy & ~SCTP_PR_SCTP_MASK) + goto out; + + asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); + if (!asoc || params.sprstat_sid >= asoc->stream->outcnt) + goto out; + + streamout = &asoc->stream->out[params.sprstat_sid]; + if (policy == SCTP_PR_SCTP_NONE) { + params.sprstat_abandoned_unsent = 0; + params.sprstat_abandoned_sent = 0; + for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { + params.sprstat_abandoned_unsent += + streamout->abandoned_unsent[policy]; + params.sprstat_abandoned_sent += + streamout->abandoned_sent[policy]; + } + } else { + params.sprstat_abandoned_unsent = + streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)]; + params.sprstat_abandoned_sent = + streamout->abandoned_sent[__SCTP_PR_INDEX(policy)]; + } + + if (put_user(len, optlen) || copy_to_user(optval, ¶ms, len)) { + retval = -EFAULT; + goto out; + } + + retval = 0; + +out: + return retval; +} + +static int sctp_getsockopt_reconfig_supported(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + params.assoc_value = asoc->reconf_enable; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + params.assoc_value = sp->ep->reconf_enable; + } else { + retval = -EINVAL; + goto out; + } + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt_enable_strreset(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -6748,6 +6880,14 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_pr_assocstatus(sk, len, optval, optlen); break; + case SCTP_PR_STREAM_STATUS: + retval = sctp_getsockopt_pr_streamstatus(sk, len, optval, + optlen); + break; + case SCTP_RECONFIG_SUPPORTED: + retval = sctp_getsockopt_reconfig_supported(sk, len, optval, + optlen); + break; case SCTP_ENABLE_STREAM_RESET: retval = sctp_getsockopt_enable_strreset(sk, len, optval, optlen); @@ -7034,6 +7174,9 @@ int sctp_inet_listen(struct socket *sock, int backlog) if (sock->state != SS_UNCONNECTED) goto out; + if (!sctp_sstate(sk, LISTENING) && !sctp_sstate(sk, CLOSED)) + goto out; + /* If backlog is zero, disable listening. */ if (!backlog) { if (sctp_sstate(sk, CLOSED)) @@ -7437,9 +7580,12 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, if (sk->sk_shutdown & RCV_SHUTDOWN) break; - if (sk_can_busy_loop(sk) && - sk_busy_loop(sk, noblock)) - continue; + if (sk_can_busy_loop(sk)) { + sk_busy_loop(sk, noblock); + + if (!skb_queue_empty(&sk->sk_receive_queue)) + continue; + } /* User doesn't want to wait. */ error = -EAGAIN; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 1c6cc04fa3a4..eff6008a32ba 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -35,33 +35,60 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp) +int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp) { struct sctp_stream *stream; int i; stream = kzalloc(sizeof(*stream), gfp); if (!stream) - return NULL; + return -ENOMEM; - stream->outcnt = outcnt; + stream->outcnt = asoc->c.sinit_num_ostreams; stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); if (!stream->out) { kfree(stream); - return NULL; + return -ENOMEM; } for (i = 0; i < stream->outcnt; i++) stream->out[i].state = SCTP_STREAM_OPEN; - stream->incnt = incnt; + asoc->stream = stream; + + return 0; +} + +int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp) +{ + struct sctp_stream *stream = asoc->stream; + int i; + + /* Initial stream->out size may be very big, so free it and alloc + * a new one with new outcnt to save memory. + */ + kfree(stream->out); + stream->outcnt = asoc->c.sinit_num_ostreams; + stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); + if (!stream->out) + goto nomem; + + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; + + stream->incnt = asoc->c.sinit_max_instreams; stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp); if (!stream->in) { kfree(stream->out); - kfree(stream); - return NULL; + goto nomem; } - return stream; + return 0; + +nomem: + asoc->stream = NULL; + kfree(stream); + + return -ENOMEM; } void sctp_stream_free(struct sctp_stream *stream) @@ -267,18 +294,6 @@ int sctp_send_add_streams(struct sctp_association *asoc, stream->out = streamout; } - if (in) { - struct sctp_stream_in *streamin; - - streamin = krealloc(stream->in, incnt * sizeof(*streamin), - GFP_KERNEL); - if (!streamin) - goto out; - - memset(streamin + stream->incnt, 0, in * sizeof(*streamin)); - stream->in = streamin; - } - chunk = sctp_make_strreset_addstrm(asoc, out, in); if (!chunk) goto out; @@ -303,13 +318,14 @@ out: } static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param( - struct sctp_association *asoc, __u32 resp_seq) + struct sctp_association *asoc, __u32 resp_seq, + __be16 type) { struct sctp_chunk *chunk = asoc->strreset_chunk; struct sctp_reconf_chunk *hdr; union sctp_params param; - if (ntohl(resp_seq) != asoc->strreset_outseq || !chunk) + if (!chunk) return NULL; hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr; @@ -320,7 +336,8 @@ static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param( */ struct sctp_strreset_tsnreq *req = param.v; - if (req->request_seq == resp_seq) + if ((!resp_seq || req->request_seq == resp_seq) && + (!type || type == req->param_hdr.type)) return param.v; } @@ -361,13 +378,9 @@ struct sctp_chunk *sctp_process_strreset_outreq( goto out; if (asoc->strreset_chunk) { - sctp_paramhdr_t *param_hdr; - struct sctp_transport *t; - - param_hdr = sctp_chunk_lookup_strreset_param( - asoc, outreq->response_seq); - if (!param_hdr || param_hdr->type != - SCTP_PARAM_RESET_IN_REQUEST) { + if (!sctp_chunk_lookup_strreset_param( + asoc, outreq->response_seq, + SCTP_PARAM_RESET_IN_REQUEST)) { /* same process with outstanding isn't 0 */ result = SCTP_STRRESET_ERR_IN_PROGRESS; goto out; @@ -377,6 +390,8 @@ struct sctp_chunk *sctp_process_strreset_outreq( asoc->strreset_outseq++; if (!asoc->strreset_outstanding) { + struct sctp_transport *t; + t = asoc->strreset_chunk->transport; if (del_timer(&t->reconf_timer)) sctp_transport_put(t); @@ -477,3 +492,367 @@ out: return chunk; } + +struct sctp_chunk *sctp_process_strreset_tsnreq( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp) +{ + __u32 init_tsn = 0, next_tsn = 0, max_tsn_seen; + struct sctp_strreset_tsnreq *tsnreq = param.v; + struct sctp_stream *stream = asoc->stream; + __u32 result = SCTP_STRRESET_DENIED; + __u32 request_seq; + __u16 i; + + request_seq = ntohl(tsnreq->request_seq); + if (request_seq > asoc->strreset_inseq) { + result = SCTP_STRRESET_ERR_BAD_SEQNO; + goto out; + } else if (request_seq == asoc->strreset_inseq) { + asoc->strreset_inseq++; + } + + if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ)) + goto out; + + if (asoc->strreset_outstanding) { + result = SCTP_STRRESET_ERR_IN_PROGRESS; + goto out; + } + + /* G3: The same processing as though a SACK chunk with no gap report + * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were + * received MUST be performed. + */ + max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map); + sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen); + sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); + + /* G1: Compute an appropriate value for the Receiver's Next TSN -- the + * TSN that the peer should use to send the next DATA chunk. The + * value SHOULD be the smallest TSN not acknowledged by the + * receiver of the request plus 2^31. + */ + init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1 << 31); + sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, + init_tsn, GFP_ATOMIC); + + /* G4: The same processing as though a FWD-TSN chunk (as defined in + * [RFC3758]) with all streams affected and a new cumulative TSN + * ACK of the Receiver's Next TSN minus 1 were received MUST be + * performed. + */ + sctp_outq_free(&asoc->outqueue); + + /* G2: Compute an appropriate value for the local endpoint's next TSN, + * i.e., the next TSN assigned by the receiver of the SSN/TSN reset + * chunk. The value SHOULD be the highest TSN sent by the receiver + * of the request plus 1. + */ + next_tsn = asoc->next_tsn; + asoc->ctsn_ack_point = next_tsn - 1; + asoc->adv_peer_ack_point = asoc->ctsn_ack_point; + + /* G5: The next expected and outgoing SSNs MUST be reset to 0 for all + * incoming and outgoing streams. + */ + for (i = 0; i < stream->outcnt; i++) + stream->out[i].ssn = 0; + for (i = 0; i < stream->incnt; i++) + stream->in[i].ssn = 0; + + result = SCTP_STRRESET_PERFORMED; + + *evp = sctp_ulpevent_make_assoc_reset_event(asoc, 0, init_tsn, + next_tsn, GFP_ATOMIC); + +out: + return sctp_make_strreset_tsnresp(asoc, result, request_seq, + next_tsn, init_tsn); +} + +struct sctp_chunk *sctp_process_strreset_addstrm_out( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp) +{ + struct sctp_strreset_addstrm *addstrm = param.v; + struct sctp_stream *stream = asoc->stream; + __u32 result = SCTP_STRRESET_DENIED; + struct sctp_stream_in *streamin; + __u32 request_seq, incnt; + __u16 in; + + request_seq = ntohl(addstrm->request_seq); + if (request_seq > asoc->strreset_inseq) { + result = SCTP_STRRESET_ERR_BAD_SEQNO; + goto out; + } else if (request_seq == asoc->strreset_inseq) { + asoc->strreset_inseq++; + } + + if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ)) + goto out; + + if (asoc->strreset_chunk) { + if (!sctp_chunk_lookup_strreset_param( + asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) { + /* same process with outstanding isn't 0 */ + result = SCTP_STRRESET_ERR_IN_PROGRESS; + goto out; + } + + asoc->strreset_outstanding--; + asoc->strreset_outseq++; + + if (!asoc->strreset_outstanding) { + struct sctp_transport *t; + + t = asoc->strreset_chunk->transport; + if (del_timer(&t->reconf_timer)) + sctp_transport_put(t); + + sctp_chunk_put(asoc->strreset_chunk); + asoc->strreset_chunk = NULL; + } + } + + in = ntohs(addstrm->number_of_streams); + incnt = stream->incnt + in; + if (!in || incnt > SCTP_MAX_STREAM) + goto out; + + streamin = krealloc(stream->in, incnt * sizeof(*streamin), + GFP_ATOMIC); + if (!streamin) + goto out; + + memset(streamin + stream->incnt, 0, in * sizeof(*streamin)); + stream->in = streamin; + stream->incnt = incnt; + + result = SCTP_STRRESET_PERFORMED; + + *evp = sctp_ulpevent_make_stream_change_event(asoc, + 0, ntohs(addstrm->number_of_streams), 0, GFP_ATOMIC); + +out: + return sctp_make_strreset_resp(asoc, result, request_seq); +} + +struct sctp_chunk *sctp_process_strreset_addstrm_in( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp) +{ + struct sctp_strreset_addstrm *addstrm = param.v; + struct sctp_stream *stream = asoc->stream; + __u32 result = SCTP_STRRESET_DENIED; + struct sctp_stream_out *streamout; + struct sctp_chunk *chunk = NULL; + __u32 request_seq, outcnt; + __u16 out; + + request_seq = ntohl(addstrm->request_seq); + if (request_seq > asoc->strreset_inseq) { + result = SCTP_STRRESET_ERR_BAD_SEQNO; + goto out; + } else if (request_seq == asoc->strreset_inseq) { + asoc->strreset_inseq++; + } + + if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ)) + goto out; + + if (asoc->strreset_outstanding) { + result = SCTP_STRRESET_ERR_IN_PROGRESS; + goto out; + } + + out = ntohs(addstrm->number_of_streams); + outcnt = stream->outcnt + out; + if (!out || outcnt > SCTP_MAX_STREAM) + goto out; + + streamout = krealloc(stream->out, outcnt * sizeof(*streamout), + GFP_ATOMIC); + if (!streamout) + goto out; + + memset(streamout + stream->outcnt, 0, out * sizeof(*streamout)); + stream->out = streamout; + + chunk = sctp_make_strreset_addstrm(asoc, out, 0); + if (!chunk) + goto out; + + asoc->strreset_chunk = chunk; + asoc->strreset_outstanding = 1; + sctp_chunk_hold(asoc->strreset_chunk); + + stream->outcnt = outcnt; + + *evp = sctp_ulpevent_make_stream_change_event(asoc, + 0, 0, ntohs(addstrm->number_of_streams), GFP_ATOMIC); + +out: + if (!chunk) + chunk = sctp_make_strreset_resp(asoc, result, request_seq); + + return chunk; +} + +struct sctp_chunk *sctp_process_strreset_resp( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp) +{ + struct sctp_strreset_resp *resp = param.v; + struct sctp_stream *stream = asoc->stream; + struct sctp_transport *t; + __u16 i, nums, flags = 0; + sctp_paramhdr_t *req; + __u32 result; + + req = sctp_chunk_lookup_strreset_param(asoc, resp->response_seq, 0); + if (!req) + return NULL; + + result = ntohl(resp->result); + if (result != SCTP_STRRESET_PERFORMED) { + /* if in progress, do nothing but retransmit */ + if (result == SCTP_STRRESET_IN_PROGRESS) + return NULL; + else if (result == SCTP_STRRESET_DENIED) + flags = SCTP_STREAM_RESET_DENIED; + else + flags = SCTP_STREAM_RESET_FAILED; + } + + if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) { + struct sctp_strreset_outreq *outreq; + __u16 *str_p = NULL; + + outreq = (struct sctp_strreset_outreq *)req; + nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2; + + if (result == SCTP_STRRESET_PERFORMED) { + if (nums) { + str_p = outreq->list_of_streams; + for (i = 0; i < nums; i++) + stream->out[ntohs(str_p[i])].ssn = 0; + } else { + for (i = 0; i < stream->outcnt; i++) + stream->out[i].ssn = 0; + } + + flags = SCTP_STREAM_RESET_OUTGOING_SSN; + } + + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; + + *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, + nums, str_p, GFP_ATOMIC); + } else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) { + struct sctp_strreset_inreq *inreq; + __u16 *str_p = NULL; + + /* if the result is performed, it's impossible for inreq */ + if (result == SCTP_STRRESET_PERFORMED) + return NULL; + + inreq = (struct sctp_strreset_inreq *)req; + nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2; + + str_p = inreq->list_of_streams; + *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, + nums, str_p, GFP_ATOMIC); + } else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) { + struct sctp_strreset_resptsn *resptsn; + __u32 stsn, rtsn; + + /* check for resptsn, as sctp_verify_reconf didn't do it*/ + if (ntohs(param.p->length) != sizeof(*resptsn)) + return NULL; + + resptsn = (struct sctp_strreset_resptsn *)resp; + stsn = ntohl(resptsn->senders_next_tsn); + rtsn = ntohl(resptsn->receivers_next_tsn); + + if (result == SCTP_STRRESET_PERFORMED) { + __u32 mtsn = sctp_tsnmap_get_max_tsn_seen( + &asoc->peer.tsn_map); + + sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn); + sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); + + sctp_tsnmap_init(&asoc->peer.tsn_map, + SCTP_TSN_MAP_INITIAL, + stsn, GFP_ATOMIC); + + sctp_outq_free(&asoc->outqueue); + + asoc->next_tsn = rtsn; + asoc->ctsn_ack_point = asoc->next_tsn - 1; + asoc->adv_peer_ack_point = asoc->ctsn_ack_point; + + for (i = 0; i < stream->outcnt; i++) + stream->out[i].ssn = 0; + for (i = 0; i < stream->incnt; i++) + stream->in[i].ssn = 0; + } + + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; + + *evp = sctp_ulpevent_make_assoc_reset_event(asoc, flags, + stsn, rtsn, GFP_ATOMIC); + } else if (req->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS) { + struct sctp_strreset_addstrm *addstrm; + __u16 number; + + addstrm = (struct sctp_strreset_addstrm *)req; + nums = ntohs(addstrm->number_of_streams); + number = stream->outcnt - nums; + + if (result == SCTP_STRRESET_PERFORMED) + for (i = number; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; + else + stream->outcnt = number; + + *evp = sctp_ulpevent_make_stream_change_event(asoc, flags, + 0, nums, GFP_ATOMIC); + } else if (req->type == SCTP_PARAM_RESET_ADD_IN_STREAMS) { + struct sctp_strreset_addstrm *addstrm; + + /* if the result is performed, it's impossible for addstrm in + * request. + */ + if (result == SCTP_STRRESET_PERFORMED) + return NULL; + + addstrm = (struct sctp_strreset_addstrm *)req; + nums = ntohs(addstrm->number_of_streams); + + *evp = sctp_ulpevent_make_stream_change_event(asoc, flags, + nums, 0, GFP_ATOMIC); + } + + asoc->strreset_outstanding--; + asoc->strreset_outseq++; + + /* remove everything for this reconf request */ + if (!asoc->strreset_outstanding) { + t = asoc->strreset_chunk->transport; + if (del_timer(&t->reconf_timer)) + sctp_transport_put(t); + + sctp_chunk_put(asoc->strreset_chunk); + asoc->strreset_chunk = NULL; + } + + return NULL; +} diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index daf8554fd42a..0e732f68c2bf 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -275,6 +275,13 @@ static struct ctl_table sctp_net_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "reconf_enable", + .data = &init_net.sctp.reconf_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "auth_enable", .data = &init_net.sctp.auth_enable, .maxlen = sizeof(int), diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3379668af368..721eeebfcd8a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -251,14 +251,13 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 pmtu) +void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { - struct dst_entry *dst; + struct dst_entry *dst = sctp_transport_dst_check(t); if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", - __func__, pmtu, - SCTP_DEFAULT_MINSEGMENT); + __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); /* Use default minimum segment size and disable * pmtu discovery on this transport. */ @@ -267,17 +266,13 @@ void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 p t->pathmtu = pmtu; } - dst = sctp_transport_dst_check(t); - if (!dst) - t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); - if (dst) { - dst->ops->update_pmtu(dst, sk, NULL, pmtu); - + dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); dst = sctp_transport_dst_check(t); - if (!dst) - t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); } + + if (!dst) + t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); } /* Caches the dst entry and source address for a transport's destination diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index c8881bc542a0..ec2b3e013c2f 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -883,6 +883,62 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( return event; } +struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event( + const struct sctp_association *asoc, __u16 flags, __u32 local_tsn, + __u32 remote_tsn, gfp_t gfp) +{ + struct sctp_assoc_reset_event *areset; + struct sctp_ulpevent *event; + struct sk_buff *skb; + + event = sctp_ulpevent_new(sizeof(struct sctp_assoc_reset_event), + MSG_NOTIFICATION, gfp); + if (!event) + return NULL; + + skb = sctp_event2skb(event); + areset = (struct sctp_assoc_reset_event *) + skb_put(skb, sizeof(struct sctp_assoc_reset_event)); + + areset->assocreset_type = SCTP_ASSOC_RESET_EVENT; + areset->assocreset_flags = flags; + areset->assocreset_length = sizeof(struct sctp_assoc_reset_event); + sctp_ulpevent_set_owner(event, asoc); + areset->assocreset_assoc_id = sctp_assoc2id(asoc); + areset->assocreset_local_tsn = local_tsn; + areset->assocreset_remote_tsn = remote_tsn; + + return event; +} + +struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event( + const struct sctp_association *asoc, __u16 flags, + __u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp) +{ + struct sctp_stream_change_event *schange; + struct sctp_ulpevent *event; + struct sk_buff *skb; + + event = sctp_ulpevent_new(sizeof(struct sctp_stream_change_event), + MSG_NOTIFICATION, gfp); + if (!event) + return NULL; + + skb = sctp_event2skb(event); + schange = (struct sctp_stream_change_event *) + skb_put(skb, sizeof(struct sctp_stream_change_event)); + + schange->strchange_type = SCTP_STREAM_CHANGE_EVENT; + schange->strchange_flags = flags; + schange->strchange_length = sizeof(struct sctp_stream_change_event); + sctp_ulpevent_set_owner(event, asoc); + schange->strchange_assoc_id = sctp_assoc2id(asoc); + schange->strchange_instrms = strchange_instrms; + schange->strchange_outstrms = strchange_outstrms; + + return event; +} + /* Return the notification type, assuming this is a notification * event. */ diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 093803786eac..5b6ee21368a6 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -147,7 +147,6 @@ static int smc_release(struct socket *sock) schedule_delayed_work(&smc->sock_put_work, SMC_CLOSE_SOCK_PUT_DELAY); } - sk->sk_prot->unhash(sk); release_sock(sk); sock_put(sk); @@ -451,6 +450,9 @@ static int smc_connect_rdma(struct smc_sock *smc) goto decline_rdma_unlock; } + smc_close_init(smc); + smc_rx_init(smc); + if (local_contact == SMC_FIRST_CONTACT) { rc = smc_ib_ready_link(link); if (rc) { @@ -477,7 +479,6 @@ static int smc_connect_rdma(struct smc_sock *smc) mutex_unlock(&smc_create_lgr_pending); smc_tx_init(smc); - smc_rx_init(smc); out_connected: smc_copy_sock_settings_to_clc(smc); @@ -637,7 +638,8 @@ struct sock *smc_accept_dequeue(struct sock *parent, smc_accept_unlink(new_sk); if (new_sk->sk_state == SMC_CLOSED) { - /* tbd in follow-on patch: close this sock */ + new_sk->sk_prot->unhash(new_sk); + sock_put(new_sk); continue; } if (new_sock) @@ -657,8 +659,13 @@ void smc_close_non_accepted(struct sock *sk) if (!sk->sk_lingertime) /* wait for peer closing */ sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; - if (!smc->use_fallback) + if (smc->use_fallback) { + sk->sk_state = SMC_CLOSED; + } else { smc_close_active(smc); + sock_set_flag(sk, SOCK_DEAD); + sk->sk_shutdown |= SHUTDOWN_MASK; + } if (smc->clcsock) { struct socket *tcp; @@ -666,11 +673,9 @@ void smc_close_non_accepted(struct sock *sk) smc->clcsock = NULL; sock_release(tcp); } - sock_set_flag(sk, SOCK_DEAD); - sk->sk_shutdown |= SHUTDOWN_MASK; if (smc->use_fallback) { schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); - } else { + } else if (sk->sk_state == SMC_CLOSED) { smc_conn_free(&smc->conn); schedule_delayed_work(&smc->sock_put_work, SMC_CLOSE_SOCK_PUT_DELAY); @@ -800,6 +805,9 @@ static void smc_listen_work(struct work_struct *work) goto decline_rdma; } + smc_close_init(new_smc); + smc_rx_init(new_smc); + rc = smc_clc_send_accept(new_smc, local_contact); if (rc) goto out_err; @@ -839,7 +847,6 @@ static void smc_listen_work(struct work_struct *work) } smc_tx_init(new_smc); - smc_rx_init(new_smc); out_connected: sk_refcnt_debug_inc(newsmcsk); diff --git a/net/smc/smc.h b/net/smc/smc.h index ee5fbea24549..6e44313e4467 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -164,6 +164,7 @@ struct smc_connection { #ifndef KERNEL_HAS_ATOMIC64 spinlock_t acurs_lock; /* protect cursors */ #endif + struct work_struct close_work; /* peer sent some closing */ }; struct smc_sock { /* smc sock container */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 5a339493872e..a7294edbc221 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -217,8 +217,13 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smc->sk.sk_err = ECONNRESET; conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; } - if (smc_cdc_rxed_any_close_or_senddone(conn)) - smc_close_passive_received(smc); + if (smc_cdc_rxed_any_close_or_senddone(conn)) { + smc->sk.sk_shutdown |= RCV_SHUTDOWN; + if (smc->clcsock && smc->clcsock->sk) + smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; + sock_set_flag(&smc->sk, SOCK_DONE); + schedule_work(&conn->close_work); + } /* piggy backed tx info */ /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ @@ -228,8 +233,6 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smc_close_wake_tx_prepared(smc); } - /* subsequent patch: trigger socket release if connection closed */ - /* socket connected but not accepted */ if (!smc->sk.sk_socket) return; diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 67a71d170bed..3c2e166b5d22 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -117,7 +117,6 @@ void smc_close_active_abort(struct smc_sock *smc) struct smc_cdc_conn_state_flags *txflags = &smc->conn.local_tx_ctrl.conn_state_flags; - bh_lock_sock(&smc->sk); smc->sk.sk_err = ECONNABORTED; if (smc->clcsock && smc->clcsock->sk) { smc->clcsock->sk->sk_err = ECONNABORTED; @@ -125,6 +124,7 @@ void smc_close_active_abort(struct smc_sock *smc) } switch (smc->sk.sk_state) { case SMC_INIT: + case SMC_ACTIVE: smc->sk.sk_state = SMC_PEERABORTWAIT; break; case SMC_APPCLOSEWAIT1: @@ -161,10 +161,15 @@ void smc_close_active_abort(struct smc_sock *smc) } sock_set_flag(&smc->sk, SOCK_DEAD); - bh_unlock_sock(&smc->sk); smc->sk.sk_state_change(&smc->sk); } +static inline bool smc_close_sent_any_close(struct smc_connection *conn) +{ + return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || + conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; +} + int smc_close_active(struct smc_sock *smc) { struct smc_cdc_conn_state_flags *txflags = @@ -185,8 +190,7 @@ again: case SMC_INIT: sk->sk_state = SMC_CLOSED; if (smc->smc_listen_work.func) - flush_work(&smc->smc_listen_work); - sock_put(sk); + cancel_work_sync(&smc->smc_listen_work); break; case SMC_LISTEN: sk->sk_state = SMC_CLOSED; @@ -198,7 +202,7 @@ again: } release_sock(sk); smc_close_cleanup_listen(sk); - flush_work(&smc->tcp_listen_work); + cancel_work_sync(&smc->smc_listen_work); lock_sock(sk); break; case SMC_ACTIVE: @@ -218,7 +222,7 @@ again: case SMC_APPFINCLOSEWAIT: /* socket already shutdown wr or both (active close) */ if (txflags->peer_done_writing && - !txflags->peer_conn_closed) { + !smc_close_sent_any_close(conn)) { /* just shutdown wr done, send close request */ rc = smc_close_final(conn); } @@ -248,6 +252,13 @@ again: break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: + if (txflags->peer_done_writing && + !smc_close_sent_any_close(conn)) { + /* just shutdown wr done, send close request */ + rc = smc_close_final(conn); + } + /* peer sending PeerConnectionClosed will cause transition */ + break; case SMC_PEERFINCLOSEWAIT: /* peer sending PeerConnectionClosed will cause transition */ break; @@ -285,7 +296,7 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: if (txflags->peer_done_writing && - !txflags->peer_conn_closed) { + !smc_close_sent_any_close(&smc->conn)) { /* just shutdown, but not yet closed locally */ smc_close_abort(&smc->conn); sk->sk_state = SMC_PROCESSABORT; @@ -306,22 +317,27 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, * or peer_done_writing. - * Called under tasklet context. */ -void smc_close_passive_received(struct smc_sock *smc) +static void smc_close_passive_work(struct work_struct *work) { - struct smc_cdc_conn_state_flags *rxflags = - &smc->conn.local_rx_ctrl.conn_state_flags; + struct smc_connection *conn = container_of(work, + struct smc_connection, + close_work); + struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + struct smc_cdc_conn_state_flags *rxflags; struct sock *sk = &smc->sk; int old_state; - sk->sk_shutdown |= RCV_SHUTDOWN; - if (smc->clcsock && smc->clcsock->sk) - smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; - sock_set_flag(&smc->sk, SOCK_DONE); - + lock_sock(&smc->sk); old_state = sk->sk_state; + if (!conn->alert_token_local) { + /* abnormal termination */ + smc_close_active_abort(smc); + goto wakeup; + } + + rxflags = &smc->conn.local_rx_ctrl.conn_state_flags; if (rxflags->peer_conn_abort) { smc_close_passive_abort_received(smc); goto wakeup; @@ -331,7 +347,7 @@ void smc_close_passive_received(struct smc_sock *smc) case SMC_INIT: if (atomic_read(&smc->conn.bytes_to_rcv) || (rxflags->peer_done_writing && - !rxflags->peer_conn_closed)) + !smc_cdc_rxed_any_close(conn))) sk->sk_state = SMC_APPCLOSEWAIT1; else sk->sk_state = SMC_CLOSED; @@ -348,7 +364,7 @@ void smc_close_passive_received(struct smc_sock *smc) if (!smc_cdc_rxed_any_close(&smc->conn)) break; if (sock_flag(sk, SOCK_DEAD) && - (sk->sk_shutdown == SHUTDOWN_MASK)) { + smc_close_sent_any_close(conn)) { /* smc_release has already been called locally */ sk->sk_state = SMC_CLOSED; } else { @@ -367,17 +383,19 @@ void smc_close_passive_received(struct smc_sock *smc) } wakeup: - if (old_state != sk->sk_state) - sk->sk_state_change(sk); sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ - if ((sk->sk_state == SMC_CLOSED) && - (sock_flag(sk, SOCK_DEAD) || (old_state == SMC_INIT))) { - smc_conn_free(&smc->conn); - schedule_delayed_work(&smc->sock_put_work, - SMC_CLOSE_SOCK_PUT_DELAY); + if (old_state != sk->sk_state) { + sk->sk_state_change(sk); + if ((sk->sk_state == SMC_CLOSED) && + (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { + smc_conn_free(&smc->conn); + schedule_delayed_work(&smc->sock_put_work, + SMC_CLOSE_SOCK_PUT_DELAY); + } } + release_sock(&smc->sk); } void smc_close_sock_put_work(struct work_struct *work) @@ -442,3 +460,9 @@ again: sk->sk_state_change(&smc->sk); return rc; } + +/* Initialize close properties on connection establishment. */ +void smc_close_init(struct smc_sock *smc) +{ + INIT_WORK(&smc->conn.close_work, smc_close_passive_work); +} diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index bc9a2df3633c..4a3d99a8d7cb 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -21,8 +21,8 @@ void smc_close_wake_tx_prepared(struct smc_sock *smc); void smc_close_active_abort(struct smc_sock *smc); int smc_close_active(struct smc_sock *smc); -void smc_close_passive_received(struct smc_sock *smc); void smc_close_sock_put_work(struct work_struct *work); int smc_close_shutdown_write(struct smc_sock *smc); +void smc_close_init(struct smc_sock *smc); #endif /* SMC_CLOSE_H */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 0eac633fb354..65020e93ff21 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -316,7 +316,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr) smc = container_of(conn, struct smc_sock, conn); sock_hold(&smc->sk); __smc_lgr_unregister_conn(conn); - smc_close_active_abort(smc); + schedule_work(&conn->close_work); sock_put(&smc->sk); node = rb_first(&lgr->conns_all); } diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index e6743c008ac5..16b7c801f8b6 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -179,8 +179,6 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, u8 port_idx; smcibdev = container_of(handler, struct smc_ib_device, event_handler); - if (!smc_pnet_find_ib(smcibdev->ibdev->name)) - return; switch (ibevent->event) { case IB_EVENT_PORT_ERR: @@ -259,7 +257,6 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) .max_recv_wr = SMC_WR_BUF_CNT * 3, .max_send_sge = SMC_IB_MAX_SEND_SGE, .max_recv_sge = 1, - .max_inline_data = SMC_WR_TX_SIZE, }, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_RC, diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index a95f74bb5569..7e1f0e24d177 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -11,6 +11,7 @@ #ifndef _SMC_IB_H #define _SMC_IB_H +#include <linux/interrupt.h> #include <linux/if_ether.h> #include <rdma/ib_verbs.h> diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 9d3e7fb8348d..78f7af28ae4f 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -219,7 +219,7 @@ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) } /* Find an infiniband device by a given name. The device might not exist. */ -struct smc_ib_device *smc_pnet_find_ib(char *ib_name) +static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) { struct smc_ib_device *ibdev; @@ -523,8 +523,11 @@ void smc_pnet_find_roce_resource(struct sock *sk, read_lock(&smc_pnettable.lock); list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { if (dst->dev == pnetelem->ndev) { - *smcibdev = pnetelem->smcibdev; - *ibport = pnetelem->ib_port; + if (smc_ib_port_active(pnetelem->smcibdev, + pnetelem->ib_port)) { + *smcibdev = pnetelem->smcibdev; + *ibport = pnetelem->ib_port; + } break; } } diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 32ab3df928ca..c4f1bccd4358 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -16,7 +16,6 @@ struct smc_ib_device; int smc_pnet_init(void) __init; void smc_pnet_exit(void); int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev); -struct smc_ib_device *smc_pnet_find_ib(char *ib_name); void smc_pnet_find_roce_resource(struct sock *sk, struct smc_ib_device **smcibdev, u8 *ibport); diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index c4ef9a4ec569..f0c8b089f770 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -36,11 +36,10 @@ static void smc_rx_data_ready(struct sock *sk) if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); if ((sk->sk_shutdown == SHUTDOWN_MASK) || (sk->sk_state == SMC_CLOSED)) sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); - else - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); rcu_read_unlock(); } diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 69a0013dd25c..21ec1832ab51 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -431,9 +431,13 @@ static void smc_tx_work(struct work_struct *work) struct smc_connection, tx_work); struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + int rc; lock_sock(&smc->sk); - smc_tx_sndbuf_nonempty(conn); + rc = smc_tx_sndbuf_nonempty(conn); + if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked && + !atomic_read(&conn->bytes_to_rcv)) + conn->local_rx_ctrl.prod_flags.write_blocked = 0; release_sock(&smc->sk); } diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index eadf157418dc..874ee9f9d796 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -447,7 +447,7 @@ static void smc_wr_init_sge(struct smc_link *lnk) lnk->wr_tx_ibs[i].num_sge = 1; lnk->wr_tx_ibs[i].opcode = IB_WR_SEND; lnk->wr_tx_ibs[i].send_flags = - IB_SEND_SIGNALED | IB_SEND_SOLICITED | IB_SEND_INLINE; + IB_SEND_SIGNALED | IB_SEND_SOLICITED; } for (i = 0; i < lnk->wr_rx_cnt; i++) { lnk->wr_rx_sges[i].addr = diff --git a/net/socket.c b/net/socket.c index e034fe4164be..eea997036ada 100644 --- a/net/socket.c +++ b/net/socket.c @@ -652,6 +652,16 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(kernel_sendmsg); +static bool skb_is_err_queue(const struct sk_buff *skb) +{ + /* pkt_type of skbs enqueued on the error queue are set to + * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do + * in recvmsg, since skbs received on a local socket will never + * have a pkt_type of PACKET_OUTGOING. + */ + return skb->pkt_type == PACKET_OUTGOING; +} + /* * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) */ @@ -695,7 +705,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING, sizeof(tss), &tss); - if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS)) + if (skb_is_err_queue(skb) && skb->len && + SKB_EXT_ERR(skb)->opt_stats) put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS, skb->len, skb->data); } @@ -3345,3 +3356,49 @@ int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) return sock->ops->shutdown(sock, how); } EXPORT_SYMBOL(kernel_sock_shutdown); + +/* This routine returns the IP overhead imposed by a socket i.e. + * the length of the underlying IP header, depending on whether + * this is an IPv4 or IPv6 socket and the length from IP options turned + * on at the socket. + */ +u32 kernel_sock_ip_overhead(struct sock *sk) +{ + struct inet_sock *inet; + struct ip_options_rcu *opt; + u32 overhead = 0; + bool owned_by_user; +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6_pinfo *np; + struct ipv6_txoptions *optv6 = NULL; +#endif /* IS_ENABLED(CONFIG_IPV6) */ + + if (!sk) + return overhead; + + owned_by_user = sock_owned_by_user(sk); + switch (sk->sk_family) { + case AF_INET: + inet = inet_sk(sk); + overhead += sizeof(struct iphdr); + opt = rcu_dereference_protected(inet->inet_opt, + owned_by_user); + if (opt) + overhead += opt->opt.optlen; + return overhead; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + np = inet6_sk(sk); + overhead += sizeof(struct ipv6hdr); + if (np) + optv6 = rcu_dereference_protected(np->opt, + owned_by_user); + if (optv6) + overhead += (optv6->opt_flen + optv6->opt_nflen); + return overhead; +#endif /* IS_ENABLED(CONFIG_IPV6) */ + default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */ + return overhead; + } +} +EXPORT_SYMBOL(kernel_sock_ip_overhead); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 8931e33b6541..2b720fa35c4f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1635,6 +1635,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, xprt = &svsk->sk_xprt; svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv); + set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags); serv->sv_bc_xprt = xprt; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index c13a5c35ce14..fc8f14c7bfec 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -127,6 +127,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv, xprt = &cma_xprt->sc_xprt; svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv); + set_bit(XPT_CONG_CTRL, &xprt->xpt_flags); serv->sv_bc_xprt = xprt; dprintk("svcrdma: %s(%p)\n", __func__, xprt); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 017801f9dbaa..8d40a7d31c99 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -826,7 +826,7 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev, int err; err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX, - switchdev_port_bridge_policy); + switchdev_port_bridge_policy, NULL); if (err) return err; diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 33a5bdfbef76..d174ee3254ee 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -802,7 +802,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy); + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -851,7 +851,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy); + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -891,7 +891,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy); + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -939,7 +939,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy); + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -982,7 +982,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy); + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -1104,7 +1104,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, info->attrs[TIPC_NLA_MEDIA], - tipc_nl_media_policy); + tipc_nl_media_policy, info->extack); if (err) return err; @@ -1152,7 +1152,7 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, info->attrs[TIPC_NLA_MEDIA], - tipc_nl_media_policy); + tipc_nl_media_policy, info->extack); if (!attrs[TIPC_NLA_MEDIA_NAME]) return -EINVAL; diff --git a/net/tipc/link.c b/net/tipc/link.c index ddd2dd6f77aa..60820dc35a08 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1827,7 +1827,7 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) int err; err = nla_parse_nested(props, TIPC_NLA_PROP_MAX, prop, - tipc_nl_prop_policy); + tipc_nl_prop_policy, NULL); if (err) return err; diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 9be6592e4a6f..bd0aac87b41a 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -416,6 +416,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); + tipc_subscrp_get(s); list_add(&s->nameseq_list, &nseq->subscriptions); if (!sseq) @@ -787,6 +788,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) if (seq != NULL) { spin_lock_bh(&seq->lock); list_del_init(&s->nameseq_list); + tipc_subscrp_put(s); if (!seq->first_free && list_empty(&seq->subscriptions)) { hlist_del_init_rcu(&seq->ns_list); kfree(seq->sseqs); diff --git a/net/tipc/net.c b/net/tipc/net.c index ab8a2d5d1e32..719c5924b638 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -211,8 +211,8 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) return -EINVAL; err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, - info->attrs[TIPC_NLA_NET], - tipc_nl_net_policy); + info->attrs[TIPC_NLA_NET], tipc_nl_net_policy, + info->extack); if (err) return err; diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 26ca8dd64ded..b76f13f6fea1 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -268,7 +268,8 @@ int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) if (!*attr) return -EOPNOTSUPP; - return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy); + return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy, + NULL); } int __init tipc_netlink_start(void) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index e1ae8a8a2b8e..9bfe886ab330 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -296,7 +296,7 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, err = nla_parse(attrbuf, tipc_genl_family.maxattr, (const struct nlattr *)trans_buf->data, - trans_buf->len, NULL); + trans_buf->len, NULL, NULL); if (err) goto parse_out; @@ -352,7 +352,7 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, - attrs[TIPC_NLA_BEARER], NULL); + attrs[TIPC_NLA_BEARER], NULL, NULL); if (err) return err; @@ -472,7 +472,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], - NULL); + NULL, NULL); if (err) return err; @@ -480,7 +480,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX, - link[TIPC_NLA_LINK_PROP], NULL); + link[TIPC_NLA_LINK_PROP], NULL, NULL); if (err) return err; @@ -488,7 +488,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX, - link[TIPC_NLA_LINK_STATS], NULL); + link[TIPC_NLA_LINK_STATS], NULL, NULL); if (err) return err; @@ -598,7 +598,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], - NULL); + NULL, NULL); if (err) return err; @@ -795,7 +795,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, - attrs[TIPC_NLA_NAME_TABLE], NULL); + attrs[TIPC_NLA_NAME_TABLE], NULL, NULL); if (err) return err; @@ -803,7 +803,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, - nt[TIPC_NLA_NAME_TABLE_PUBL], NULL); + nt[TIPC_NLA_NAME_TABLE_PUBL], NULL, NULL); if (err) return err; @@ -863,7 +863,7 @@ static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], - NULL); + NULL, NULL); if (err) return err; @@ -929,7 +929,7 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], - NULL); + NULL, NULL); if (err) return err; @@ -940,8 +940,8 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, u32 node; struct nlattr *con[TIPC_NLA_CON_MAX + 1]; - nla_parse_nested(con, TIPC_NLA_CON_MAX, sock[TIPC_NLA_SOCK_CON], - NULL); + nla_parse_nested(con, TIPC_NLA_CON_MAX, + sock[TIPC_NLA_SOCK_CON], NULL, NULL); node = nla_get_u32(con[TIPC_NLA_CON_NODE]); tipc_tlv_sprintf(msg->rep, " connected to <%u.%u.%u:%u>", @@ -977,8 +977,8 @@ static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_MEDIA]) return -EINVAL; - err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, attrs[TIPC_NLA_MEDIA], - NULL); + err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, + attrs[TIPC_NLA_MEDIA], NULL, NULL); if (err) return err; @@ -998,7 +998,7 @@ static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE], - NULL); + NULL, NULL); if (err) return err; @@ -1045,7 +1045,7 @@ static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET], - NULL); + NULL, NULL); if (err) return err; diff --git a/net/tipc/node.c b/net/tipc/node.c index 4512e83652b1..01b1f077603e 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1607,8 +1607,8 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) return -EINVAL; err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, - info->attrs[TIPC_NLA_NET], - tipc_nl_net_policy); + info->attrs[TIPC_NLA_NET], tipc_nl_net_policy, + info->extack); if (err) return err; @@ -1774,7 +1774,7 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy); + tipc_nl_link_policy, info->extack); if (err) return err; @@ -1902,7 +1902,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy); + tipc_nl_link_policy, info->extack); if (err) return err; @@ -2042,7 +2042,7 @@ int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_MON_MAX, info->attrs[TIPC_NLA_MON], - tipc_nl_monitor_policy); + tipc_nl_monitor_policy, info->extack); if (err) return err; @@ -2163,7 +2163,7 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, err = nla_parse_nested(mon, TIPC_NLA_MON_MAX, attrs[TIPC_NLA_MON], - tipc_nl_monitor_policy); + tipc_nl_monitor_policy, NULL); if (err) return err; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7130e73bd42c..740100abeec3 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2511,6 +2511,28 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } } +static int tipc_socketpair(struct socket *sock1, struct socket *sock2) +{ + struct tipc_sock *tsk2 = tipc_sk(sock2->sk); + struct tipc_sock *tsk1 = tipc_sk(sock1->sk); + u32 onode = tipc_own_addr(sock_net(sock1->sk)); + + tsk1->peer.family = AF_TIPC; + tsk1->peer.addrtype = TIPC_ADDR_ID; + tsk1->peer.scope = TIPC_NODE_SCOPE; + tsk1->peer.addr.id.ref = tsk2->portid; + tsk1->peer.addr.id.node = onode; + tsk2->peer.family = AF_TIPC; + tsk2->peer.addrtype = TIPC_ADDR_ID; + tsk2->peer.scope = TIPC_NODE_SCOPE; + tsk2->peer.addr.id.ref = tsk1->portid; + tsk2->peer.addr.id.node = onode; + + tipc_sk_finish_conn(tsk1, tsk2->portid, onode); + tipc_sk_finish_conn(tsk2, tsk1->portid, onode); + return 0; +} + /* Protocol switches for the various types of TIPC sockets */ static const struct proto_ops msg_ops = { @@ -2519,7 +2541,7 @@ static const struct proto_ops msg_ops = { .release = tipc_release, .bind = tipc_bind, .connect = tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = sock_no_accept, .getname = tipc_getname, .poll = tipc_poll, @@ -2540,7 +2562,7 @@ static const struct proto_ops packet_ops = { .release = tipc_release, .bind = tipc_bind, .connect = tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, @@ -2561,7 +2583,7 @@ static const struct proto_ops stream_ops = { .release = tipc_release, .bind = tipc_bind, .connect = tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, @@ -2844,7 +2866,7 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], - tipc_nl_sock_policy); + tipc_nl_sock_policy, NULL); if (err) return err; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 9d94e65d0894..0bf91cd3733c 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -54,8 +54,6 @@ struct tipc_subscriber { static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); -static void tipc_subscrp_put(struct tipc_subscription *subscription); -static void tipc_subscrp_get(struct tipc_subscription *subscription); /** * htohl - convert value to endianness used by destination @@ -125,7 +123,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, { struct tipc_name_seq seq; - tipc_subscrp_get(sub); tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) return; @@ -135,12 +132,17 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, node); - tipc_subscrp_put(sub); } static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; + struct tipc_subscriber *subscriber = sub->subscriber; + + spin_lock_bh(&subscriber->lock); + tipc_nametbl_unsubscribe(sub); + list_del(&sub->subscrp_list); + spin_unlock_bh(&subscriber->lock); /* Notify subscriber of timeout */ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, @@ -172,21 +174,17 @@ static void tipc_subscrp_kref_release(struct kref *kref) struct tipc_net *tn = net_generic(sub->net, tipc_net_id); struct tipc_subscriber *subscriber = sub->subscriber; - spin_lock_bh(&subscriber->lock); - tipc_nametbl_unsubscribe(sub); - list_del(&sub->subscrp_list); atomic_dec(&tn->subscription_count); - spin_unlock_bh(&subscriber->lock); kfree(sub); tipc_subscrb_put(subscriber); } -static void tipc_subscrp_put(struct tipc_subscription *subscription) +void tipc_subscrp_put(struct tipc_subscription *subscription) { kref_put(&subscription->kref, tipc_subscrp_kref_release); } -static void tipc_subscrp_get(struct tipc_subscription *subscription) +void tipc_subscrp_get(struct tipc_subscription *subscription) { kref_get(&subscription->kref); } @@ -205,11 +203,9 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) continue; - tipc_subscrp_get(sub); - spin_unlock_bh(&subscriber->lock); + tipc_nametbl_unsubscribe(sub); + list_del(&sub->subscrp_list); tipc_subscrp_delete(sub); - tipc_subscrp_put(sub); - spin_lock_bh(&subscriber->lock); if (s) break; diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index ffdc214c117a..ee52957dc952 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -78,4 +78,7 @@ u32 tipc_subscrp_convert_seq_type(u32 type, int swap); int tipc_topsrv_start(struct net *net); void tipc_topsrv_stop(struct net *net); +void tipc_subscrp_put(struct tipc_subscription *subscription); +void tipc_subscrp_get(struct tipc_subscription *subscription); + #endif diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 46061cf48cd1..ecca64fc6a6f 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -457,7 +457,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy); + tipc_nl_bearer_policy, NULL); if (err) return err; @@ -609,7 +609,8 @@ int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr) struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; struct udp_media_addr *dst; - if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy)) + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, + tipc_nl_udp_policy, NULL)) return -EINVAL; if (!opts[TIPC_NLA_UDP_REMOTE]) @@ -662,7 +663,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attrs[TIPC_NLA_BEARER_UDP_OPTS], - tipc_nl_udp_policy)) + tipc_nl_udp_policy, NULL)) goto err; if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 928691c43408..6a7fe7660551 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -996,7 +996,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) unsigned int hash; struct unix_address *addr; struct hlist_head *list; - struct path path = { NULL, NULL }; + struct path path = { }; err = -EINVAL; if (sunaddr->sun_family != AF_UNIX) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 6a0d48525fcf..c36757e72844 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -146,6 +146,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); + BUG_ON(!atomic_long_read(&u->inflight)); BUG_ON(list_empty(&u->link)); if (atomic_long_dec_and_test(&u->inflight)) @@ -341,6 +342,14 @@ void unix_gc(void) } list_del(&cursor); + /* Now gc_candidates contains only garbage. Restore original + * inflight counters for these as well, and remove the skbuffs + * which are creating the cycle(s). + */ + skb_queue_head_init(&hitlist); + list_for_each_entry(u, &gc_candidates, link) + scan_children(&u->sk, inc_inflight, &hitlist); + /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */ @@ -350,14 +359,6 @@ void unix_gc(void) list_move_tail(&u->link, &gc_inflight_list); } - /* Now gc_candidates contains only garbage. Restore original - * inflight counters for these as well, and remove the skbuffs - * which are creating the cycle(s). - */ - skb_queue_head_init(&hitlist); - list_for_each_entry(u, &gc_candidates, link) - scan_children(&u->sk, inc_inflight, &hitlist); - spin_unlock(&unix_gc_lock); /* Here we are. Hitlist is filled. Die. */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9f770f33c100..6f7f6757ceef 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1102,10 +1102,19 @@ static const struct proto_ops vsock_dgram_ops = { .sendpage = sock_no_sendpage, }; +static int vsock_transport_cancel_pkt(struct vsock_sock *vsk) +{ + if (!transport->cancel_pkt) + return -EOPNOTSUPP; + + return transport->cancel_pkt(vsk); +} + static void vsock_connect_timeout(struct work_struct *work) { struct sock *sk; struct vsock_sock *vsk; + int cancel = 0; vsk = container_of(work, struct vsock_sock, dwork.work); sk = sk_vsock(vsk); @@ -1116,8 +1125,11 @@ static void vsock_connect_timeout(struct work_struct *work) sk->sk_state = SS_UNCONNECTED; sk->sk_err = ETIMEDOUT; sk->sk_error_report(sk); + cancel = 1; } release_sock(sk); + if (cancel) + vsock_transport_cancel_pkt(vsk); sock_put(sk); } @@ -1224,11 +1236,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, err = sock_intr_errno(timeout); sk->sk_state = SS_UNCONNECTED; sock->state = SS_UNCONNECTED; + vsock_transport_cancel_pkt(vsk); goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; sk->sk_state = SS_UNCONNECTED; sock->state = SS_UNCONNECTED; + vsock_transport_cancel_pkt(vsk); goto out_wait; } diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 9d24c0e958b1..68675a151f22 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -213,6 +213,47 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt) return len; } +static int +virtio_transport_cancel_pkt(struct vsock_sock *vsk) +{ + struct virtio_vsock *vsock; + struct virtio_vsock_pkt *pkt, *n; + int cnt = 0; + LIST_HEAD(freeme); + + vsock = virtio_vsock_get(); + if (!vsock) { + return -ENODEV; + } + + spin_lock_bh(&vsock->send_pkt_list_lock); + list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) { + if (pkt->vsk != vsk) + continue; + list_move(&pkt->list, &freeme); + } + spin_unlock_bh(&vsock->send_pkt_list_lock); + + list_for_each_entry_safe(pkt, n, &freeme, list) { + if (pkt->reply) + cnt++; + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + + if (cnt) { + struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; + int new_cnt; + + new_cnt = atomic_sub_return(cnt, &vsock->queued_replies); + if (new_cnt + cnt >= virtqueue_get_vring_size(rx_vq) && + new_cnt < virtqueue_get_vring_size(rx_vq)) + queue_work(virtio_vsock_workqueue, &vsock->rx_work); + } + + return 0; +} + static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) { int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; @@ -462,6 +503,7 @@ static struct virtio_transport virtio_transport = { .release = virtio_transport_release, .connect = virtio_transport_connect, .shutdown = virtio_transport_shutdown, + .cancel_pkt = virtio_transport_cancel_pkt, .dgram_bind = virtio_transport_dgram_bind, .dgram_dequeue = virtio_transport_dgram_dequeue, diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 8d592a45b597..af087b44ceea 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -58,6 +58,7 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, pkt->len = len; pkt->hdr.len = cpu_to_le32(len); pkt->reply = info->reply; + pkt->vsk = info->vsk; if (info->msg && len > 0) { pkt->buf = kmalloc(len, GFP_KERNEL); @@ -180,6 +181,7 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk, struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, .type = type, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -519,6 +521,7 @@ int virtio_transport_connect(struct vsock_sock *vsk) struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_REQUEST, .type = VIRTIO_VSOCK_TYPE_STREAM, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -534,6 +537,7 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | (mode & SEND_SHUTDOWN ? VIRTIO_VSOCK_SHUTDOWN_SEND : 0), + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -560,6 +564,7 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk, .type = VIRTIO_VSOCK_TYPE_STREAM, .msg = msg, .pkt_len = len, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -581,6 +586,7 @@ static int virtio_transport_reset(struct vsock_sock *vsk, .op = VIRTIO_VSOCK_OP_RST, .type = VIRTIO_VSOCK_TYPE_STREAM, .reply = !!pkt, + .vsk = vsk, }; /* Send RST only if the original pkt is not a RST pkt */ @@ -826,6 +832,7 @@ virtio_transport_send_response(struct vsock_sock *vsk, .remote_cid = le64_to_cpu(pkt->hdr.src_cid), .remote_port = le32_to_cpu(pkt->hdr.src_port), .reply = true, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 4be4fbbc0b50..10ae7823a19d 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -96,31 +96,23 @@ static int PROTOCOL_OVERRIDE = -1; static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) { - int err; - switch (vmci_error) { case VMCI_ERROR_NO_MEM: - err = ENOMEM; - break; + return -ENOMEM; case VMCI_ERROR_DUPLICATE_ENTRY: case VMCI_ERROR_ALREADY_EXISTS: - err = EADDRINUSE; - break; + return -EADDRINUSE; case VMCI_ERROR_NO_ACCESS: - err = EPERM; - break; + return -EPERM; case VMCI_ERROR_NO_RESOURCES: - err = ENOBUFS; - break; + return -ENOBUFS; case VMCI_ERROR_INVALID_RESOURCE: - err = EHOSTUNREACH; - break; + return -EHOSTUNREACH; case VMCI_ERROR_INVALID_ARGS: default: - err = EINVAL; + break; } - - return err > 0 ? -err : err; + return -EINVAL; } static u32 vmci_transport_peer_rid(u32 peer_cid) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d7f8be4e321a..f280357552b2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -545,22 +545,18 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, { int err; - rtnl_lock(); - if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, genl_family_attrbuf(&nl80211_fam), - nl80211_fam.maxattr, nl80211_policy); + nl80211_fam.maxattr, nl80211_policy, NULL); if (err) - goto out_unlock; + return err; *wdev = __cfg80211_wdev_from_attrs( sock_net(skb->sk), genl_family_attrbuf(&nl80211_fam)); - if (IS_ERR(*wdev)) { - err = PTR_ERR(*wdev); - goto out_unlock; - } + if (IS_ERR(*wdev)) + return PTR_ERR(*wdev); *rdev = wiphy_to_rdev((*wdev)->wiphy); /* 0 is the first index - add 1 to parse only once */ cb->args[0] = (*rdev)->wiphy_idx + 1; @@ -570,10 +566,8 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); struct wireless_dev *tmp; - if (!wiphy) { - err = -ENODEV; - goto out_unlock; - } + if (!wiphy) + return -ENODEV; *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; @@ -584,21 +578,11 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, } } - if (!*wdev) { - err = -ENODEV; - goto out_unlock; - } + if (!*wdev) + return -ENODEV; } return 0; - out_unlock: - rtnl_unlock(); - return err; -} - -static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev) -{ - rtnl_unlock(); } /* IE validation */ @@ -735,7 +719,7 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) { struct nlattr *tb[NL80211_KEY_MAX + 1]; int err = nla_parse_nested(tb, NL80211_KEY_MAX, key, - nl80211_key_policy); + nl80211_key_policy, NULL); if (err) return err; @@ -776,7 +760,7 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) err = nla_parse_nested(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1, tb[NL80211_KEY_DEFAULT_TYPES], - nl80211_key_default_policy); + nl80211_key_default_policy, NULL); if (err) return err; @@ -823,10 +807,11 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k) if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) { struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES]; - int err = nla_parse_nested( - kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1, - info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES], - nl80211_key_default_policy); + int err = nla_parse_nested(kdt, + NUM_NL80211_KEY_DEFAULT_TYPES - 1, + info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES], + nl80211_key_default_policy, + info->extack); if (err) return err; @@ -1908,8 +1893,8 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, struct nl80211_dump_wiphy_state *state) { struct nlattr **tb = genl_family_attrbuf(&nl80211_fam); - int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - tb, nl80211_fam.maxattr, nl80211_policy); + int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb, + nl80211_fam.maxattr, nl80211_policy, NULL); /* ignore parse errors for backward compatibility */ if (ret) return 0; @@ -2324,7 +2309,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rem_txq_params) { result = nla_parse_nested(tb, NL80211_TXQ_ATTR_MAX, nl_txq_params, - txq_params_policy); + txq_params_policy, + info->extack); if (result) return result; result = parse_txq_params(tb, &txq_params); @@ -2608,17 +2594,17 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * int filter_wiphy = -1; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; + int ret; rtnl_lock(); if (!cb->args[2]) { struct nl80211_dump_wiphy_state state = { .filter_wiphy = -1, }; - int ret; ret = nl80211_dump_wiphy_parse(skb, cb, &state); if (ret) - return ret; + goto out_unlock; filter_wiphy = state.filter_wiphy; @@ -2663,12 +2649,14 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * wp_idx++; } out: - rtnl_unlock(); - cb->args[0] = wp_idx; cb->args[1] = if_idx; - return skb->len; + ret = skb->len; + out_unlock: + rtnl_unlock(); + + return ret; } static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) @@ -2709,8 +2697,8 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) if (!nla) return -EINVAL; - if (nla_parse_nested(flags, NL80211_MNTR_FLAG_MAX, - nla, mntr_flags_policy)) + if (nla_parse_nested(flags, NL80211_MNTR_FLAG_MAX, nla, + mntr_flags_policy, NULL)) return -EINVAL; for (flag = 1; flag <= NL80211_MNTR_FLAG_MAX; flag++) @@ -3575,7 +3563,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, if (sband == NULL) return -EINVAL; err = nla_parse_nested(tb, NL80211_TXRATE_MAX, tx_rates, - nl80211_txattr_policy); + nl80211_txattr_policy, info->extack); if (err) return err; if (tb[NL80211_TXRATE_LEGACY]) { @@ -4114,8 +4102,8 @@ static int parse_station_flags(struct genl_info *info, if (!nla) return 0; - if (nla_parse_nested(flags, NL80211_STA_FLAG_MAX, - nla, sta_flags_policy)) + if (nla_parse_nested(flags, NL80211_STA_FLAG_MAX, nla, + sta_flags_policy, info->extack)) return -EINVAL; /* @@ -4452,9 +4440,10 @@ static int nl80211_dump_station(struct sk_buff *skb, int sta_idx = cb->args[2]; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out_err; if (!wdev->netdev) { err = -EINVAL; @@ -4489,7 +4478,7 @@ static int nl80211_dump_station(struct sk_buff *skb, cb->args[2] = sta_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return err; } @@ -4741,7 +4730,7 @@ static int nl80211_parse_sta_wme(struct genl_info *info, nla = info->attrs[NL80211_ATTR_STA_WME]; err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla, - nl80211_sta_wme_policy); + nl80211_sta_wme_policy, info->extack); if (err) return err; @@ -5275,9 +5264,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb, int path_idx = cb->args[2]; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out_err; if (!rdev->ops->dump_mpath) { err = -EOPNOTSUPP; @@ -5310,7 +5300,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, cb->args[2] = path_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return err; } @@ -5470,9 +5460,10 @@ static int nl80211_dump_mpp(struct sk_buff *skb, int path_idx = cb->args[2]; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out_err; if (!rdev->ops->dump_mpp) { err = -EOPNOTSUPP; @@ -5505,7 +5496,7 @@ static int nl80211_dump_mpp(struct sk_buff *skb, cb->args[2] = path_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return err; } @@ -5864,7 +5855,7 @@ do { \ return -EINVAL; if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX, info->attrs[NL80211_ATTR_MESH_CONFIG], - nl80211_meshconf_params_policy)) + nl80211_meshconf_params_policy, info->extack)) return -EINVAL; /* This makes sure that there aren't more than 32 mesh config @@ -6013,7 +6004,7 @@ static int nl80211_parse_mesh_setup(struct genl_info *info, return -EINVAL; if (nla_parse_nested(tb, NL80211_MESH_SETUP_ATTR_MAX, info->attrs[NL80211_ATTR_MESH_SETUP], - nl80211_mesh_setup_params_policy)) + nl80211_mesh_setup_params_policy, info->extack)) return -EINVAL; if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC]) @@ -6404,7 +6395,8 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], rem_reg_rules) { r = nla_parse_nested(tb, NL80211_REG_RULE_ATTR_MAX, - nl_reg_rule, reg_rule_policy); + nl_reg_rule, reg_rule_policy, + info->extack); if (r) goto bad_reg; r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); @@ -6472,7 +6464,7 @@ static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy, return -EINVAL; err = nla_parse_nested(attr, NL80211_BSS_SELECT_ATTR_MAX, nest, - nl80211_bss_select_policy); + nl80211_bss_select_policy, NULL); if (err) return err; @@ -6873,7 +6865,7 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, return -EINVAL; err = nla_parse_nested(plan, NL80211_SCHED_SCAN_PLAN_MAX, - attr, nl80211_plan_policy); + attr, nl80211_plan_policy, NULL); if (err) return err; @@ -6964,7 +6956,8 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, err = nla_parse_nested(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, - attr, nl80211_match_policy); + attr, nl80211_match_policy, + NULL); if (err) return ERR_PTR(err); /* add other standalone attributes here */ @@ -7143,7 +7136,8 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, err = nla_parse_nested(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, - attr, nl80211_match_policy); + attr, nl80211_match_policy, + NULL); if (err) goto out_free; ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]; @@ -7444,7 +7438,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(csa_attrs, NL80211_ATTR_MAX, info->attrs[NL80211_ATTR_CSA_IES], - nl80211_policy); + nl80211_policy, info->extack); if (err) return err; @@ -7674,9 +7668,12 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) int start = cb->args[2], idx = 0; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); - if (err) + if (err) { + rtnl_unlock(); return err; + } wdev_lock(wdev); spin_lock_bh(&rdev->bss_lock); @@ -7699,7 +7696,7 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) wdev_unlock(wdev); cb->args[2] = idx; - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return skb->len; } @@ -7784,9 +7781,10 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) int res; bool radio_stats; + rtnl_lock(); res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (res) - return res; + goto out_err; /* prepare_wdev_dump parsed the attributes */ radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; @@ -7827,7 +7825,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) cb->args[2] = survey_idx; res = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return res; } @@ -8646,7 +8644,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb, struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - attrbuf, nl80211_fam.maxattr, nl80211_policy); + attrbuf, nl80211_fam.maxattr, + nl80211_policy, NULL); if (err) goto out_err; @@ -9537,7 +9536,7 @@ static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) return -EINVAL; err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm, - nl80211_attr_cqm_policy); + nl80211_attr_cqm_policy, info->extack); if (err) return err; @@ -9947,7 +9946,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, return -EINVAL; err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TCP, attr, - nl80211_wowlan_tcp_policy); + nl80211_wowlan_tcp_policy, NULL); if (err) return err; @@ -10092,7 +10091,8 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev, goto out; } - err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy); + err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy, + NULL); if (err) goto out; @@ -10129,7 +10129,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TRIG, info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS], - nl80211_wowlan_policy); + nl80211_wowlan_policy, info->extack); if (err) return err; @@ -10212,7 +10212,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) u8 *mask_pat; nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, - NULL); + NULL, info->extack); err = -EINVAL; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) @@ -10423,7 +10423,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, struct nlattr *pat_tb[NUM_NL80211_PKTPAT]; err = nla_parse_nested(tb, NL80211_ATTR_COALESCE_RULE_MAX, rule, - nl80211_coalesce_policy); + nl80211_coalesce_policy, NULL); if (err) return err; @@ -10461,7 +10461,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, rem) { u8 *mask_pat; - nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL); + nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL, NULL); if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) return -EINVAL; @@ -10582,7 +10582,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(tb, MAX_NL80211_REKEY_DATA, info->attrs[NL80211_ATTR_REKEY_DATA], - nl80211_rekey_policy); + nl80211_rekey_policy, info->extack); if (err) return err; @@ -10899,7 +10899,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb, err = nla_parse_nested(tb, NL80211_NAN_FUNC_ATTR_MAX, info->attrs[NL80211_ATTR_NAN_FUNC], - nl80211_nan_func_policy); + nl80211_nan_func_policy, info->extack); if (err) return err; @@ -10996,7 +10996,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb, err = nla_parse_nested(srf_tb, NL80211_NAN_SRF_ATTR_MAX, tb[NL80211_NAN_FUNC_SRF], - nl80211_nan_srf_policy); + nl80211_nan_srf_policy, info->extack); if (err) goto out; @@ -11508,17 +11508,13 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, void *data = NULL; unsigned int data_len = 0; - rtnl_lock(); - if (cb->args[0]) { /* subtract the 1 again here */ struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); struct wireless_dev *tmp; - if (!wiphy) { - err = -ENODEV; - goto out_unlock; - } + if (!wiphy) + return -ENODEV; *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; @@ -11535,26 +11531,22 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, return 0; } - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - attrbuf, nl80211_fam.maxattr, nl80211_policy); + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf, + nl80211_fam.maxattr, nl80211_policy, NULL); if (err) - goto out_unlock; + return err; if (!attrbuf[NL80211_ATTR_VENDOR_ID] || - !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { - err = -EINVAL; - goto out_unlock; - } + !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) + return -EINVAL; *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(*wdev)) *wdev = NULL; *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf); - if (IS_ERR(*rdev)) { - err = PTR_ERR(*rdev); - goto out_unlock; - } + if (IS_ERR(*rdev)) + return PTR_ERR(*rdev); vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]); subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); @@ -11567,19 +11559,15 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd) continue; - if (!vcmd->dumpit) { - err = -EOPNOTSUPP; - goto out_unlock; - } + if (!vcmd->dumpit) + return -EOPNOTSUPP; vcmd_idx = i; break; } - if (vcmd_idx < 0) { - err = -EOPNOTSUPP; - goto out_unlock; - } + if (vcmd_idx < 0) + return -EOPNOTSUPP; if (attrbuf[NL80211_ATTR_VENDOR_DATA]) { data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]); @@ -11596,9 +11584,6 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, /* keep rtnl locked in successful case */ return 0; - out_unlock: - rtnl_unlock(); - return err; } static int nl80211_vendor_cmd_dump(struct sk_buff *skb, @@ -11613,9 +11598,10 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, int err; struct nlattr *vendor_data; + rtnl_lock(); err = nl80211_prepare_vendor_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out; vcmd_idx = cb->args[2]; data = (void *)cb->args[3]; @@ -11624,15 +11610,21 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV | WIPHY_VENDOR_CMD_NEED_NETDEV)) { - if (!wdev) - return -EINVAL; + if (!wdev) { + err = -EINVAL; + goto out; + } if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV && - !wdev->netdev) - return -EINVAL; + !wdev->netdev) { + err = -EINVAL; + goto out; + } if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { - if (!wdev_running(wdev)) - return -ENETDOWN; + if (!wdev_running(wdev)) { + err = -ENETDOWN; + goto out; + } } } diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 16b6b5988be9..570a2b67ca10 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -132,12 +132,10 @@ static int wiphy_resume(struct device *dev) /* Age scan results with time spent in suspend */ cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at); - if (rdev->ops->resume) { - rtnl_lock(); - if (rdev->wiphy.registered) - ret = rdev_resume(rdev); - rtnl_unlock(); - } + rtnl_lock(); + if (rdev->wiphy.registered && rdev->ops->resume) + ret = rdev_resume(rdev); + rtnl_unlock(); return ret; } diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index 666c5ffe929d..eaea9c4fb3b0 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -54,8 +54,8 @@ static inline unsigned int __xfrm4_dpref_spref_hash(const xfrm_address_t *daddr, static inline unsigned int __xfrm6_pref_hash(const xfrm_address_t *addr, __u8 prefixlen) { - int pdw; - int pbi; + unsigned int pdw; + unsigned int pbi; u32 initval = 0; pdw = prefixlen >> 5; /* num of whole u32 in prefix */ diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 9705c279494b..5f691fd53a6c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -412,7 +412,14 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es up = nla_data(rp); ulen = xfrm_replay_state_esn_len(up); - if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) + /* Check the overall length and the internal bitmap length to avoid + * potential overflow. */ + if (nla_len(rp) < ulen || + xfrm_replay_state_esn_len(replay_esn) != ulen || + replay_esn->bmp_len != up->bmp_len) + return -EINVAL; + + if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) return -EINVAL; return 0; @@ -925,8 +932,8 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) u8 proto = 0; int err; - err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, - xfrma_policy); + err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, xfrma_policy, + NULL); if (err < 0) return err; @@ -2441,7 +2448,8 @@ static const struct xfrm_link { [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo }, }; -static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *attrs[XFRMA_MAX+1]; @@ -2481,7 +2489,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, link->nla_max ? : XFRMA_MAX, - link->nla_pol ? : xfrma_policy); + link->nla_pol ? : xfrma_policy, extack); if (err < 0) return err; @@ -3101,7 +3109,6 @@ static bool xfrm_is_alive(const struct km_event *c) } static struct xfrm_mgr netlink_mgr = { - .id = "netlink", .notify = xfrm_send_state_notify, .acquire = xfrm_send_acquire, .compile_policy = xfrm_compile_policy, |