diff options
Diffstat (limited to 'net')
207 files changed, 5492 insertions, 2505 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index abc5f400fc71..c1742322f7d2 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -797,12 +797,6 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, - .ndo_fdb_add = switchdev_port_fdb_add, - .ndo_fdb_del = switchdev_port_fdb_del, - .ndo_fdb_dump = switchdev_port_fdb_dump, - .ndo_bridge_setlink = switchdev_port_bridge_setlink, - .ndo_bridge_getlink = switchdev_port_bridge_getlink, - .ndo_bridge_dellink = switchdev_port_bridge_dellink, .ndo_get_lock_subclass = vlan_dev_get_lock_subclass, .ndo_get_iflink = vlan_dev_get_iflink, }; diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 495ba7cdcb04..fa8d6b475c06 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1022,7 +1022,8 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, u8 tq_avg; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "update_originator(): Searching and updating originator entry of received packet\n"); + "%s(): Searching and updating originator entry of received packet\n", + __func__); rcu_read_lock(); hlist_for_each_entry_rcu(tmp_neigh_node, @@ -1944,7 +1945,7 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv, batadv_iv_ogm_orig_print_neigh(orig_node, if_outgoing, seq); - seq_puts(seq, "\n"); + seq_putc(seq, '\n'); batman_count++; next: diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index a36c8e7291d6..4e2724c5b33d 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -400,7 +400,7 @@ static void batadv_v_orig_print(struct batadv_priv *bat_priv, neigh_node->if_incoming->net_dev->name); batadv_v_orig_print_neigh(orig_node, if_outgoing, seq); - seq_puts(seq, "\n"); + seq_putc(seq, '\n'); batman_count++; next: diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index b90c9903e246..b58007b79e3a 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -19,6 +19,7 @@ #include "main.h" #include <linux/atomic.h> +#include <linux/bitops.h> #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/etherdevice.h> @@ -29,6 +30,7 @@ #include <linux/kernel.h> #include <linux/kref.h> #include <linux/netdevice.h> +#include <linux/nl80211.h> #include <linux/random.h> #include <linux/rculist.h> #include <linux/rcupdate.h> @@ -109,8 +111,12 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) */ return 0; } - if (!ret) - return sinfo.expected_throughput / 100; + if (ret) + goto default_throughput; + if (!(sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT))) + goto default_throughput; + + return sinfo.expected_throughput / 100; } /* if not a wifi interface, check if this device provides data via diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index d07e89ec8467..cdd8e8e4df0b 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -394,7 +394,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, */ ether_addr_copy(ethhdr->h_source, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): CLAIM %pM on vid %d\n", mac, + "%s(): CLAIM %pM on vid %d\n", __func__, mac, batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_UNCLAIM: @@ -403,7 +403,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, */ ether_addr_copy(hw_src, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): UNCLAIM %pM on vid %d\n", mac, + "%s(): UNCLAIM %pM on vid %d\n", __func__, mac, batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_ANNOUNCE: @@ -412,7 +412,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, */ ether_addr_copy(hw_src, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): ANNOUNCE of %pM on vid %d\n", + "%s(): ANNOUNCE of %pM on vid %d\n", __func__, ethhdr->h_source, batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_REQUEST: @@ -423,15 +423,15 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, ether_addr_copy(hw_src, mac); ether_addr_copy(ethhdr->h_dest, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n", + "%s(): REQUEST of %pM to %pM on vid %d\n", __func__, ethhdr->h_source, ethhdr->h_dest, batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_LOOPDETECT: ether_addr_copy(ethhdr->h_source, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): LOOPDETECT of %pM to %pM on vid %d\n", - ethhdr->h_source, ethhdr->h_dest, + "%s(): LOOPDETECT of %pM to %pM on vid %d\n", + __func__, ethhdr->h_source, ethhdr->h_dest, batadv_print_vid(vid)); break; @@ -509,7 +509,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, return entry; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n", + "%s(): not found (%pM, %d), creating new entry\n", __func__, orig, batadv_print_vid(vid)); entry = kzalloc(sizeof(*entry), GFP_ATOMIC); @@ -605,7 +605,8 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, int i; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_answer_request(): received a claim request, send all of our own claims again\n"); + "%s(): received a claim request, send all of our own claims again\n", + __func__); backbone_gw = batadv_backbone_hash_find(bat_priv, primary_if->net_dev->dev_addr, @@ -718,8 +719,8 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, kref_init(&claim->refcount); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", - mac, batadv_print_vid(vid)); + "%s(): adding new entry %pM, vid %d to hash ...\n", + __func__, mac, batadv_print_vid(vid)); kref_get(&claim->refcount); hash_added = batadv_hash_add(bat_priv->bla.claim_hash, @@ -739,8 +740,9 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, goto claim_free_ref; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_add_claim(): changing ownership for %pM, vid %d to gw %pM\n", - mac, batadv_print_vid(vid), backbone_gw->orig); + "%s(): changing ownership for %pM, vid %d to gw %pM\n", + __func__, mac, batadv_print_vid(vid), + backbone_gw->orig); remove_crc = true; } @@ -808,7 +810,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, if (!claim) return; - batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n", + batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): %pM, vid %d\n", __func__, mac, batadv_print_vid(vid)); batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, @@ -848,8 +850,8 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, crc = ntohs(*((__be16 *)(&an_addr[4]))); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", - batadv_print_vid(vid), backbone_gw->orig, crc); + "%s(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", + __func__, batadv_print_vid(vid), backbone_gw->orig, crc); spin_lock_bh(&backbone_gw->crc_lock); backbone_crc = backbone_gw->crc; @@ -857,8 +859,8 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, if (backbone_crc != crc) { batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, - "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", - backbone_gw->orig, + "%s(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", + __func__, backbone_gw->orig, batadv_print_vid(backbone_gw->vid), backbone_crc, crc); @@ -903,8 +905,8 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv, return true; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "handle_request(): REQUEST vid %d (sent by %pM)...\n", - batadv_print_vid(vid), ethhdr->h_source); + "%s(): REQUEST vid %d (sent by %pM)...\n", + __func__, batadv_print_vid(vid), ethhdr->h_source); batadv_bla_answer_request(bat_priv, primary_if, vid); return true; @@ -940,7 +942,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, /* this must be an UNCLAIM frame */ batadv_dbg(BATADV_DBG_BLA, bat_priv, - "handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n", + "%s(): UNCLAIM %pM on vid %d (sent by %pM)...\n", __func__, claim_addr, batadv_print_vid(vid), backbone_gw->orig); batadv_bla_del_claim(bat_priv, claim_addr, vid); @@ -1160,9 +1162,9 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, ethhdr); if (ret == 1) batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, batadv_print_vid(vid), hw_src, - hw_dst); + "%s(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", + __func__, ethhdr->h_source, batadv_print_vid(vid), + hw_src, hw_dst); if (ret < 2) return !!ret; @@ -1196,8 +1198,9 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, } batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, batadv_print_vid(vid), hw_src, hw_dst); + "%s(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", + __func__, ethhdr->h_source, batadv_print_vid(vid), hw_src, + hw_dst); return true; } @@ -1237,8 +1240,8 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) continue; batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, - "bla_purge_backbone_gw(): backbone gw %pM timed out\n", - backbone_gw->orig); + "%s(): backbone gw %pM timed out\n", + __func__, backbone_gw->orig); purge_now: /* don't wait for the pending request anymore */ @@ -1295,11 +1298,11 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, goto skip; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_purge_claims(): timed out.\n"); + "%s(): timed out.\n", __func__); purge_now: batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_purge_claims(): %pM, vid %d\n", + "%s(): %pM, vid %d\n", __func__, claim->addr, claim->vid); batadv_handle_unclaim(bat_priv, primary_if, @@ -1851,8 +1854,8 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, */ batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_rx(): Unclaimed MAC %pM found. Claim it. Local: %s\n", - ethhdr->h_source, + "%s(): Unclaimed MAC %pM found. Claim it. Local: %s\n", + __func__, ethhdr->h_source, batadv_is_my_client(bat_priv, ethhdr->h_source, vid) ? "yes" : "no"); @@ -1978,15 +1981,15 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, * older than 100 ms to make sure we really * have a roaming client here. */ - batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Roaming client %pM detected. Unclaim it.\n", - ethhdr->h_source); + batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): Roaming client %pM detected. Unclaim it.\n", + __func__, ethhdr->h_source); batadv_handle_unclaim(bat_priv, primary_if, primary_if->net_dev->dev_addr, ethhdr->h_source, vid); goto allow; } else { - batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Race for claim %pM detected. Drop packet.\n", - ethhdr->h_source); + batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): Race for claim %pM detected. Drop packet.\n", + __func__, ethhdr->h_source); goto handled; } } diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 000ca2f113ab..6930d6b50f99 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -601,7 +601,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst, BATADV_DAT_ADDR_MAX); batadv_dbg(BATADV_DBG_DAT, bat_priv, - "dat_select_candidates(): IP=%pI4 hash(IP)=%u\n", &ip_dst, + "%s(): IP=%pI4 hash(IP)=%u\n", __func__, &ip_dst, ip_key); for (select = 0; select < BATADV_DAT_CANDIDATES_NUM; select++) diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 810f7d026f54..2be8f1f46529 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2017.1" +#define BATADV_SOURCE_VERSION "2017.2" #endif /* B.A.T.M.A.N. parameters */ @@ -168,7 +168,7 @@ enum batadv_uev_type { /* Maximum number of fragments for one packet */ #define BATADV_FRAG_MAX_FRAGMENTS 16 /* Maxumim size of each fragment */ -#define BATADV_FRAG_MAX_FRAG_SIZE 1400 +#define BATADV_FRAG_MAX_FRAG_SIZE 1280 /* Time to keep fragments while waiting for rest of the fragments */ #define BATADV_FRAG_TIMEOUT 10000 diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index e1f6fc72fe3e..3604d7899e2c 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1935,9 +1935,7 @@ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) list) seq_printf(seq, "%pM ", nc_node->addr); - seq_puts(seq, "\n"); - - seq_puts(seq, " Outgoing: "); + seq_puts(seq, "\n Outgoing: "); /* For out_nc_node to this orig_node */ list_for_each_entry_rcu(nc_node, &orig_node->out_coding_list, diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index ae9f4d37d34f..f10e3ff26f9d 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -985,8 +985,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, batadv_orig_node_put(orig_node_gw); if (is_gw) { batadv_dbg(BATADV_DBG_BLA, bat_priv, - "recv_unicast_packet(): Dropped unicast pkt received from another backbone gw %pM.\n", - orig_addr_gw); + "%s(): Dropped unicast pkt received from another backbone gw %pM.\n", + __func__, orig_addr_gw); goto free_skb; } } diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 403df596a73d..d239a9d72ac3 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -971,11 +971,11 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (hard_iface) batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "purge_outstanding_packets(): %s\n", - hard_iface->net_dev->name); + "%s(): %s\n", + __func__, hard_iface->net_dev->name); else batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "purge_outstanding_packets()\n"); + "%s()\n", __func__); /* claim bcast list for free() */ spin_lock_bh(&bat_priv->forw_bcast_list_lock); diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 556f9a865ddf..e3e2585d0977 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -27,6 +27,7 @@ #include <linux/etherdevice.h> #include <linux/fs.h> #include <linux/if_ether.h> +#include <linux/init.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/kref.h> @@ -1497,7 +1498,7 @@ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb) /** * batadv_tp_meter_init - initialize global tp_meter structures */ -void batadv_tp_meter_init(void) +void __init batadv_tp_meter_init(void) { get_random_bytes(batadv_tp_prerandom, sizeof(batadv_tp_prerandom)); } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index e75b4937b497..e1133bc634b5 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -2488,18 +2488,16 @@ static bool _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry, struct batadv_tt_global_entry *tt_global_entry) { - bool ret = false; - if (tt_local_entry->common.flags & BATADV_TT_CLIENT_WIFI && tt_global_entry->common.flags & BATADV_TT_CLIENT_WIFI) - ret = true; + return true; /* check if the two clients are marked as isolated */ if (tt_local_entry->common.flags & BATADV_TT_CLIENT_ISOLA && tt_global_entry->common.flags & BATADV_TT_CLIENT_ISOLA) - ret = true; + return true; - return ret; + return false; } /** @@ -4010,19 +4008,22 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, const unsigned char *addr, unsigned short vid) { - bool ret = false; + /* ignore loop detect macs, they are not supposed to be in the tt local + * data as well. + */ + if (batadv_bla_is_loopdetect_mac(addr)) + return false; if (!batadv_tt_global_add(bat_priv, orig_node, addr, vid, BATADV_TT_CLIENT_TEMP, atomic_read(&orig_node->last_ttvn))) - goto out; + return false; batadv_dbg(BATADV_DBG_TT, bat_priv, "Added temporary global client (addr: %pM, vid: %d, orig: %pM)\n", addr, batadv_print_vid(vid), orig_node->orig); - ret = true; -out: - return ret; + + return true; } /** diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 42d0997e2fbb..8a8f77a247e6 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -733,7 +733,7 @@ void bt_procfs_cleanup(struct net *net, const char *name) EXPORT_SYMBOL(bt_procfs_init); EXPORT_SYMBOL(bt_procfs_cleanup); -static struct net_proto_family bt_sock_family_ops = { +static const struct net_proto_family bt_sock_family_ops = { .owner = THIS_MODULE, .family = PF_BLUETOOTH, .create = bt_sock_create, diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c index 24d4e60f8c48..c7b1a9aee579 100644 --- a/net/bluetooth/ecdh_helper.c +++ b/net/bluetooth/ecdh_helper.c @@ -89,11 +89,9 @@ bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32], p.curve_id = ECC_CURVE_NIST_P256; buf_len = crypto_ecdh_key_len(&p); buf = kmalloc(buf_len, GFP_KERNEL); - if (!buf) { - pr_err("alg: kpp: Failed to allocate %d bytes for buf\n", - buf_len); + if (!buf) goto free_req; - } + crypto_ecdh_encode_key(buf, buf_len, &p); /* Set A private Key */ @@ -170,11 +168,8 @@ bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]) p.key_size = 32; buf_len = crypto_ecdh_key_len(&p); buf = kmalloc(buf_len, GFP_KERNEL); - if (!buf) { - pr_err("alg: kpp: Failed to allocate %d bytes for buf\n", - buf_len); + if (!buf) goto free_req; - } do { if (tries++ >= max_tries) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 05686776a5fb..93806b959039 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -148,13 +148,13 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf, return -EINVAL; /* When the diagnostic flags are not persistent and the transport - * is not active, then there is no need for the vendor callback. - * - * Instead just store the desired value. If needed the setting - * will be programmed when the controller gets powered on. + * is not active or in user channel operation, then there is no need + * for the vendor callback. Instead just store the desired value and + * the setting will be programmed when the controller gets powered on. */ if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && - !test_bit(HCI_RUNNING, &hdev->flags)) + (!test_bit(HCI_RUNNING, &hdev->flags) || + hci_dev_test_flag(hdev, HCI_USER_CHANNEL))) goto done; hci_req_sync_lock(hdev); @@ -548,6 +548,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) { struct hci_dev *hdev = req->hdev; u8 events[8] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + bool changed = false; /* If Connectionless Slave Broadcast master role is supported * enable all necessary events for it. @@ -557,6 +558,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) events[1] |= 0x80; /* Synchronization Train Complete */ events[2] |= 0x10; /* Slave Page Response Timeout */ events[2] |= 0x20; /* CSB Channel Map Change */ + changed = true; } /* If Connectionless Slave Broadcast slave role is supported @@ -567,13 +569,24 @@ static void hci_set_event_mask_page_2(struct hci_request *req) events[2] |= 0x02; /* CSB Receive */ events[2] |= 0x04; /* CSB Timeout */ events[2] |= 0x08; /* Truncated Page Complete */ + changed = true; } /* Enable Authenticated Payload Timeout Expired event if supported */ - if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING) + if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING) { events[2] |= 0x80; + changed = true; + } - hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, sizeof(events), events); + /* Some Broadcom based controllers indicate support for Set Event + * Mask Page 2 command, but then actually do not support it. Since + * the default value is all bits set to zero, the command is only + * required if the event mask has to be changed. In case no change + * to the event mask is needed, skip this command. + */ + if (changed) + hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, + sizeof(events), events); } static int hci_init3_req(struct hci_request *req, unsigned long opt) @@ -635,6 +648,14 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt) * Report */ + /* If the controller supports Channel Selection Algorithm #2 + * feature, enable the corresponding event. + */ + if (hdev->le_features[1] & HCI_LE_CHAN_SEL_ALG2) + events[2] |= 0x08; /* LE Channel Selection + * Algorithm + */ + /* If the controller supports the LE Set Scan Enable command, * enable the corresponding advertising report event. */ @@ -677,6 +698,12 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt) if (hdev->commands[34] & 0x04) events[1] |= 0x01; /* LE Generate DHKey Complete */ + /* If the controller supports the LE Set Default PHY or + * LE Set PHY commands, enable the corresponding event. + */ + if (hdev->commands[35] & (0x20 | 0x40)) + events[1] |= 0x08; /* LE PHY Update Complete */ + hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, sizeof(events), events); @@ -771,6 +798,27 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt) sizeof(support), &support); } + /* Set Suggested Default Data Length to maximum if supported */ + if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { + struct hci_cp_le_write_def_data_len cp; + + cp.tx_len = hdev->le_max_tx_len; + cp.tx_time = hdev->le_max_tx_time; + hci_req_add(req, HCI_OP_LE_WRITE_DEF_DATA_LEN, sizeof(cp), &cp); + } + + /* Set Default PHY parameters if command is supported */ + if (hdev->commands[35] & 0x20) { + struct hci_cp_le_set_default_phy cp; + + /* No transmitter PHY or receiver PHY preferences */ + cp.all_phys = 0x03; + cp.tx_phys = 0; + cp.rx_phys = 0; + + hci_req_add(req, HCI_OP_LE_SET_DEFAULT_PHY, sizeof(cp), &cp); + } + return 0; } @@ -1384,6 +1432,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) * completed. */ if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag) ret = hdev->set_diag(hdev, true); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 14585edc9439..a0ef89772c36 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -23,6 +23,7 @@ #include <linux/debugfs.h> #include <linux/scatterlist.h> #include <linux/crypto.h> +#include <crypto/algapi.h> #include <crypto/b128ops.h> #include <crypto/hash.h> @@ -523,7 +524,7 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16], if (err) return false; - return !memcmp(bdaddr->b, hash, 3); + return !crypto_memneq(bdaddr->b, hash, 3); } int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa) @@ -579,7 +580,7 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]) /* This is unlikely, but we need to check that * we didn't accidentially generate a debug key. */ - if (memcmp(smp->local_sk, debug_sk, 32)) + if (crypto_memneq(smp->local_sk, debug_sk, 32)) break; } smp->debug_key = false; @@ -993,7 +994,7 @@ static u8 smp_random(struct smp_chan *smp) if (ret) return SMP_UNSPECIFIED; - if (memcmp(smp->pcnf, confirm, sizeof(smp->pcnf)) != 0) { + if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) { BT_ERR("Pairing failed (confirmation values mismatch)"); return SMP_CONFIRM_FAILED; } @@ -1512,7 +1513,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) smp->rrnd, r, cfm)) return SMP_UNSPECIFIED; - if (memcmp(smp->pcnf, cfm, 16)) + if (crypto_memneq(smp->pcnf, cfm, 16)) return SMP_CONFIRM_FAILED; smp->passkey_round++; @@ -1908,7 +1909,7 @@ static u8 sc_send_public_key(struct smp_chan *smp) /* This is unlikely, but we need to check that * we didn't accidentially generate a debug key. */ - if (memcmp(smp->local_sk, debug_sk, 32)) + if (crypto_memneq(smp->local_sk, debug_sk, 32)) break; } } @@ -2176,7 +2177,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) if (err) return SMP_UNSPECIFIED; - if (memcmp(smp->pcnf, cfm, 16)) + if (crypto_memneq(smp->pcnf, cfm, 16)) return SMP_CONFIRM_FAILED; } else { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), @@ -2660,7 +2661,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) if (err) return SMP_UNSPECIFIED; - if (memcmp(cfm.confirm_val, smp->pcnf, 16)) + if (crypto_memneq(cfm.confirm_val, smp->pcnf, 16)) return SMP_CONFIRM_FAILED; } @@ -2693,7 +2694,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) else hcon->pending_sec_level = BT_SECURITY_FIPS; - if (!memcmp(debug_pk, smp->remote_pk, 64)) + if (!crypto_memneq(debug_pk, smp->remote_pk, 64)) set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); if (smp->method == DSP_PASSKEY) { @@ -2792,7 +2793,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) if (err) return SMP_UNSPECIFIED; - if (memcmp(check->e, e, 16)) + if (crypto_memneq(check->e, e, 16)) return SMP_DHKEY_CHECK_FAILED; if (!hcon->out) { @@ -3506,10 +3507,10 @@ static int __init test_debug_key(void) if (!generate_ecdh_keys(pk, sk)) return -EINVAL; - if (memcmp(sk, debug_sk, 32)) + if (crypto_memneq(sk, debug_sk, 32)) return -EINVAL; - if (memcmp(pk, debug_pk, 64)) + if (crypto_memneq(pk, debug_pk, 64)) return -EINVAL; return 0; @@ -3529,7 +3530,7 @@ static int __init test_ah(struct crypto_cipher *tfm_aes) if (err) return err; - if (memcmp(res, exp, 3)) + if (crypto_memneq(res, exp, 3)) return -EINVAL; return 0; @@ -3559,7 +3560,7 @@ static int __init test_c1(struct crypto_cipher *tfm_aes) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3584,7 +3585,7 @@ static int __init test_s1(struct crypto_cipher *tfm_aes) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3616,7 +3617,7 @@ static int __init test_f4(struct crypto_shash *tfm_cmac) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3650,10 +3651,10 @@ static int __init test_f5(struct crypto_shash *tfm_cmac) if (err) return err; - if (memcmp(mackey, exp_mackey, 16)) + if (crypto_memneq(mackey, exp_mackey, 16)) return -EINVAL; - if (memcmp(ltk, exp_ltk, 16)) + if (crypto_memneq(ltk, exp_ltk, 16)) return -EINVAL; return 0; @@ -3686,7 +3687,7 @@ static int __init test_f6(struct crypto_shash *tfm_cmac) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3740,7 +3741,7 @@ static int __init test_h6(struct crypto_shash *tfm_cmac) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; diff --git a/net/bridge/br.c b/net/bridge/br.c index 889e5640455f..1407d1ba7577 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -121,7 +121,7 @@ static struct notifier_block br_device_notifier = { .notifier_call = br_device_event }; -/* called with RTNL */ +/* called with RTNL or RCU */ static int br_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -131,27 +131,36 @@ static int br_switchdev_event(struct notifier_block *unused, struct switchdev_notifier_fdb_info *fdb_info; int err = NOTIFY_DONE; - p = br_port_get_rtnl(dev); + p = br_port_get_rtnl_rcu(dev); if (!p) goto out; br = p->br; switch (event) { - case SWITCHDEV_FDB_ADD: + case SWITCHDEV_FDB_ADD_TO_BRIDGE: fdb_info = ptr; err = br_fdb_external_learn_add(br, p, fdb_info->addr, fdb_info->vid); - if (err) + if (err) { err = notifier_from_errno(err); + break; + } + br_fdb_offloaded_set(br, p, fdb_info->addr, + fdb_info->vid); break; - case SWITCHDEV_FDB_DEL: + case SWITCHDEV_FDB_DEL_TO_BRIDGE: fdb_info = ptr; err = br_fdb_external_learn_del(br, p, fdb_info->addr, fdb_info->vid); if (err) err = notifier_from_errno(err); break; + case SWITCHDEV_FDB_OFFLOADED: + fdb_info = ptr; + br_fdb_offloaded_set(br, p, fdb_info->addr, + fdb_info->vid); + break; } out: diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index ab0c7cc8448f..fef7872a320b 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -511,6 +511,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, fdb->is_static = is_static; fdb->added_by_user = 0; fdb->added_by_external_learn = 0; + fdb->offloaded = 0; fdb->updated = fdb->used = jiffies; hlist_add_head_rcu(&fdb->hlist, head); } @@ -647,11 +648,16 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, ndm->ndm_family = AF_BRIDGE; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; - ndm->ndm_flags = fdb->added_by_external_learn ? NTF_EXT_LEARNED : 0; + ndm->ndm_flags = 0; ndm->ndm_type = 0; ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex; ndm->ndm_state = fdb_to_nud(br, fdb); + if (fdb->offloaded) + ndm->ndm_flags |= NTF_OFFLOADED; + if (fdb->added_by_external_learn) + ndm->ndm_flags |= NTF_EXT_LEARNED; + if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr)) goto nla_put_failure; if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex)) @@ -690,6 +696,8 @@ static void fdb_notify(struct net_bridge *br, struct sk_buff *skb; int err = -ENOBUFS; + br_switchdev_fdb_notify(fdb, type); + skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) goto errout; @@ -1075,7 +1083,6 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, struct net_bridge_fdb_entry *fdb; int err = 0; - ASSERT_RTNL(); spin_lock_bh(&br->hash_lock); head = &br->hash[br_mac_hash(addr, vid)]; @@ -1110,7 +1117,6 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, struct net_bridge_fdb_entry *fdb; int err = 0; - ASSERT_RTNL(); spin_lock_bh(&br->hash_lock); fdb = br_fdb_find(br, addr, vid); @@ -1123,3 +1129,17 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, return err; } + +void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + struct net_bridge_fdb_entry *fdb; + + spin_lock_bh(&br->hash_lock); + + fdb = br_fdb_find(br, addr, vid); + if (fdb) + fdb->offloaded = 1; + + spin_unlock_bh(&br->hash_lock); +} diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 7f8d05cf9065..f3aef22931ab 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -138,7 +138,7 @@ void br_manage_promisc(struct net_bridge *br) /* If vlan filtering is disabled or bridge interface is placed * into promiscuous mode, place all ports in promiscuous mode. */ - if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br)) + if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br->dev)) set_all = true; list_for_each_entry(p, &br->port_list, list) { diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index b0845480a3ae..09dcdb9c0f3c 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -599,7 +599,7 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; vg = nbp_vlan_group(p); - if (br_vlan_enabled(br) && vg && entry->vid == 0) { + if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { entry->vid = v->vid; err = __br_mdb_add(net, br, entry); @@ -694,7 +694,7 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; vg = nbp_vlan_group(p); - if (br_vlan_enabled(br) && vg && entry->vid == 0) { + if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { entry->vid = v->vid; err = __br_mdb_del(br, entry); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index faa7261a992f..8dc5c8d69bcd 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2176,6 +2176,14 @@ unlock: return err; } +bool br_multicast_enabled(const struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + return !br->multicast_disabled; +} +EXPORT_SYMBOL_GPL(br_multicast_enabled); + int br_multicast_set_querier(struct net_bridge *br, unsigned long val) { unsigned long max_delay; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 32bd3ead9ba1..63dca347b73b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -662,16 +662,26 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state) } /* Set/clear or port flags based on attribute */ -static void br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[], - int attrtype, unsigned long mask) +static int br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[], + int attrtype, unsigned long mask) { - if (tb[attrtype]) { - u8 flag = nla_get_u8(tb[attrtype]); - if (flag) - p->flags |= mask; - else - p->flags &= ~mask; - } + unsigned long flags; + int err; + + if (!tb[attrtype]) + return 0; + + if (nla_get_u8(tb[attrtype])) + flags = p->flags | mask; + else + flags = p->flags & ~mask; + + err = br_switchdev_set_port_flag(p, flags, mask); + if (err) + return err; + + p->flags = flags; + return 0; } /* Process bridge protocol info on port */ @@ -681,20 +691,55 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) bool br_vlan_tunnel_old = false; int err; - br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); - br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); - br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); - br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); - br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); - br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); - br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD); - br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_TO_UCAST, BR_MULTICAST_TO_UNICAST); - br_set_port_flag(p, tb, IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD); - br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); - br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); + err = br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_TO_UCAST, BR_MULTICAST_TO_UNICAST); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); + if (err) + return err; br_vlan_tunnel_old = (p->flags & BR_VLAN_TUNNEL) ? true : false; - br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL); + err = br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL); + if (err) + return err; + if (br_vlan_tunnel_old && !(p->flags & BR_VLAN_TUNNEL)) nbp_vlan_tunnel_info_flush(p); @@ -1251,7 +1296,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) u32 ageing_time = jiffies_to_clock_t(br->ageing_time); u32 stp_enabled = br->stp_enabled; u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]; - u8 vlan_enabled = br_vlan_enabled(br); + u8 vlan_enabled = br_vlan_enabled(br->dev); u64 clockval; clockval = br_timer_value(&br->hello_timer); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 0d177280aa84..c18682f804a0 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -169,7 +169,8 @@ struct net_bridge_fdb_entry { unsigned char is_local:1, is_static:1, added_by_user:1, - added_by_external_learn:1; + added_by_external_learn:1, + offloaded:1; /* write-heavy members should not affect lookups */ unsigned long updated ____cacheline_aligned_in_smp; @@ -284,6 +285,12 @@ static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device * rtnl_dereference(dev->rx_handler_data) : NULL; } +static inline struct net_bridge_port *br_port_get_rtnl_rcu(const struct net_device *dev) +{ + return br_port_exists(dev) ? + rcu_dereference_rtnl(dev->rx_handler_data) : NULL; +} + struct net_bridge { spinlock_t lock; spinlock_t hash_lock; @@ -530,6 +537,8 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid); int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid); +void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid); /* br_forward.c */ enum br_pkt_type { @@ -854,10 +863,6 @@ static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg) return vg->pvid; } -static inline int br_vlan_enabled(struct net_bridge *br) -{ - return br->vlan_enabled; -} #else static inline bool br_allowed_ingress(const struct net_bridge *br, struct net_bridge_vlan_group *vg, @@ -945,11 +950,6 @@ static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg) return 0; } -static inline int br_vlan_enabled(struct net_bridge *br) -{ - return 0; -} - static inline int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) { @@ -1085,6 +1085,11 @@ void nbp_switchdev_frame_mark(const struct net_bridge_port *p, struct sk_buff *skb); bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, const struct sk_buff *skb); +int br_switchdev_set_port_flag(struct net_bridge_port *p, + unsigned long flags, + unsigned long mask); +void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, + int type); #else static inline int nbp_switchdev_mark_set(struct net_bridge_port *p) { @@ -1101,6 +1106,18 @@ static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, { return true; } + +static inline int br_switchdev_set_port_flag(struct net_bridge_port *p, + unsigned long flags, + unsigned long mask) +{ + return 0; +} + +static inline void +br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) +{ +} #endif /* CONFIG_NET_SWITCHDEV */ #endif diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 6f12a5271219..89110319ef0f 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -150,7 +150,6 @@ static int br_stp_call_user(struct net_bridge *br, char *arg) static void br_stp_start(struct net_bridge *br) { - struct net_bridge_port *p; int err = -ENOENT; if (net_eq(dev_net(br->dev), &init_net)) @@ -169,11 +168,6 @@ static void br_stp_start(struct net_bridge *br) if (!err) { br->stp_enabled = BR_USER_STP; br_debug(br, "userspace STP started\n"); - - /* Stop hello and hold timers */ - del_timer(&br->hello_timer); - list_for_each_entry(p, &br->port_list, list) - del_timer(&p->hold_timer); } else { br->stp_enabled = BR_KERNEL_STP; br_debug(br, "using kernel STP\n"); @@ -189,7 +183,6 @@ static void br_stp_start(struct net_bridge *br) static void br_stp_stop(struct net_bridge *br) { - struct net_bridge_port *p; int err; if (br->stp_enabled == BR_USER_STP) { @@ -198,10 +191,6 @@ static void br_stp_stop(struct net_bridge *br) br_err(br, "failed to stop userspace STP (%d)\n", err); /* To start timers on any ports left in blocking */ - mod_timer(&br->hello_timer, jiffies + br->hello_time); - list_for_each_entry(p, &br->port_list, list) - mod_timer(&p->hold_timer, - round_jiffies(jiffies + BR_HOLD_TIME)); spin_lock_bh(&br->lock); br_port_state_selection(br); spin_unlock_bh(&br->lock); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index f4097b900de1..181a44d0f1da 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -55,3 +55,79 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, return !skb->offload_fwd_mark || BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark; } + +/* Flags that can be offloaded to hardware */ +#define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \ + BR_MCAST_FLOOD | BR_BCAST_FLOOD) + +int br_switchdev_set_port_flag(struct net_bridge_port *p, + unsigned long flags, + unsigned long mask) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT, + }; + int err; + + if (mask & ~BR_PORT_FLAGS_HW_OFFLOAD) + return 0; + + err = switchdev_port_attr_get(p->dev, &attr); + if (err == -EOPNOTSUPP) + return 0; + if (err) + return err; + + /* Check if specific bridge flag attribute offload is supported */ + if (!(attr.u.brport_flags_support & mask)) { + br_warn(p->br, "bridge flag offload is not supported %u(%s)\n", + (unsigned int)p->port_no, p->dev->name); + return -EOPNOTSUPP; + } + + attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS; + attr.flags = SWITCHDEV_F_DEFER; + attr.u.brport_flags = flags; + err = switchdev_port_attr_set(p->dev, &attr); + if (err) { + br_warn(p->br, "error setting offload flag on port %u(%s)\n", + (unsigned int)p->port_no, p->dev->name); + return err; + } + + return 0; +} + +static void +br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac, + u16 vid, struct net_device *dev) +{ + struct switchdev_notifier_fdb_info info; + unsigned long notifier_type; + + info.addr = mac; + info.vid = vid; + notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE; + call_switchdev_notifiers(notifier_type, dev, &info.info); +} + +void +br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) +{ + if (!fdb->added_by_user) + return; + + switch (type) { + case RTM_DELNEIGH: + br_switchdev_fdb_call_notifiers(false, fdb->addr.addr, + fdb->vlan_id, + fdb->dst->dev); + break; + case RTM_NEWNEIGH: + br_switchdev_fdb_call_notifiers(true, fdb->addr.addr, + fdb->vlan_id, + fdb->dst->dev); + break; + } +} diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index b838213c408e..26a1a56639b2 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -706,6 +706,14 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) return __br_vlan_filter_toggle(br, val); } +bool br_vlan_enabled(const struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + return !!br->vlan_enabled; +} +EXPORT_SYMBOL_GPL(br_vlan_enabled); + int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) { int err = 0; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 346ef6b00b8f..c16dd3a47fc6 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -111,7 +111,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net, __wsum csum; u8 proto; - if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb)) + if (!nft_bridge_iphdr_validate(oldskb)) return; /* IP header checks: fragment. */ @@ -226,9 +226,6 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) __be16 fo; u8 proto = ip6h->nexthdr; - if (skb->csum_bad) - return false; - if (skb_csum_unnecessary(skb)) return true; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 21f18ea2fce4..7506b853a84d 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1103,7 +1103,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol, } -static struct net_proto_family caif_family_ops = { +static const struct net_proto_family caif_family_ops = { .family = PF_CAIF, .create = caif_create, .owner = THIS_MODULE, diff --git a/net/core/datagram.c b/net/core/datagram.c index db1866f2ffcf..e5311a7c70da 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -161,6 +161,45 @@ done: return skb; } +struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, + struct sk_buff_head *queue, + unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), + int *peeked, int *off, int *err, + struct sk_buff **last) +{ + struct sk_buff *skb; + int _off = *off; + + *last = queue->prev; + skb_queue_walk(queue, skb) { + if (flags & MSG_PEEK) { + if (_off >= skb->len && (skb->len || _off || + skb->peeked)) { + _off -= skb->len; + continue; + } + if (!skb->len) { + skb = skb_set_peeked(skb); + if (unlikely(IS_ERR(skb))) { + *err = PTR_ERR(skb); + return NULL; + } + } + *peeked = 1; + atomic_inc(&skb->users); + } else { + __skb_unlink(skb, queue); + if (destructor) + destructor(sk, skb); + } + *off = _off; + return skb; + } + return NULL; +} + /** * __skb_try_recv_datagram - Receive a datagram skbuff * @sk: socket @@ -222,40 +261,14 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, * Look at current nfs client by the way... * However, this function was correct in any case. 8) */ - int _off = *off; - - *last = (struct sk_buff *)queue; spin_lock_irqsave(&queue->lock, cpu_flags); - skb_queue_walk(queue, skb) { - *last = skb; - if (flags & MSG_PEEK) { - if (_off >= skb->len && (skb->len || _off || - skb->peeked)) { - _off -= skb->len; - continue; - } - if (!skb->len) { - skb = skb_set_peeked(skb); - if (IS_ERR(skb)) { - error = PTR_ERR(skb); - spin_unlock_irqrestore(&queue->lock, - cpu_flags); - goto no_packet; - } - } - *peeked = 1; - atomic_inc(&skb->users); - } else { - __skb_unlink(skb, queue); - if (destructor) - destructor(sk, skb); - } - spin_unlock_irqrestore(&queue->lock, cpu_flags); - *off = _off; - return skb; - } - + skb = __skb_try_recv_from_queue(sk, queue, flags, destructor, + peeked, off, &error, last); spin_unlock_irqrestore(&queue->lock, cpu_flags); + if (error) + goto no_packet; + if (skb) + return skb; if (!sk_can_busy_loop(sk)) break; @@ -317,9 +330,7 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) { bool slow; - if (likely(atomic_read(&skb->users) == 1)) - smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) { + if (!skb_unref(skb)) { sk_peek_offset_bwd(sk, len); return; } @@ -335,8 +346,8 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) } EXPORT_SYMBOL(__skb_free_datagram_locked); -int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, - unsigned int flags, +int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, + struct sk_buff *skb, unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb)) { @@ -344,15 +355,15 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, if (flags & MSG_PEEK) { err = -ENOENT; - spin_lock_bh(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { - __skb_unlink(skb, &sk->sk_receive_queue); + spin_lock_bh(&sk_queue->lock); + if (skb == skb_peek(sk_queue)) { + __skb_unlink(skb, sk_queue); atomic_dec(&skb->users); if (destructor) destructor(sk, skb); err = 0; } - spin_unlock_bh(&sk->sk_receive_queue.lock); + spin_unlock_bh(&sk_queue->lock); } atomic_inc(&sk->sk_drops); @@ -383,7 +394,8 @@ EXPORT_SYMBOL(__sk_queue_drop_skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) { - int err = __sk_queue_drop_skb(sk, skb, flags, NULL); + int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags, + NULL); kfree_skb(skb); sk_mem_reclaim_partial(sk); diff --git a/net/core/dev.c b/net/core/dev.c index 6d60149287a1..8658074ecad6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -105,6 +105,7 @@ #include <net/dst.h> #include <net/dst_metadata.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/checksum.h> #include <net/xfrm.h> #include <linux/highmem.h> @@ -142,6 +143,7 @@ #include <linux/hrtimer.h> #include <linux/netfilter_ingress.h> #include <linux/crash_dump.h> +#include <linux/sctp.h> #include "net-sysfs.h" @@ -161,6 +163,7 @@ static int netif_rx_internal(struct sk_buff *skb); static int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, struct netdev_notifier_info *info); +static struct napi_struct *napi_by_id(unsigned int napi_id); /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl @@ -865,6 +868,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) EXPORT_SYMBOL(dev_get_by_index); /** + * dev_get_by_napi_id - find a device by napi_id + * @napi_id: ID of the NAPI struct + * + * Search for an interface by NAPI ID. Returns %NULL if the device + * is not found or a pointer to the device. The device has not had + * its reference counter increased so the caller must be careful + * about locking. The caller must hold RCU lock. + */ + +struct net_device *dev_get_by_napi_id(unsigned int napi_id) +{ + struct napi_struct *napi; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (napi_id < MIN_NAPI_ID) + return NULL; + + napi = napi_by_id(napi_id); + + return napi ? napi->dev : NULL; +} +EXPORT_SYMBOL(dev_get_by_napi_id); + +/** * netdev_get_name - get a netdevice name, knowing its ifindex. * @net: network namespace * @name: a pointer to the buffer where the name will be stored. @@ -2612,6 +2640,47 @@ out: } EXPORT_SYMBOL(skb_checksum_help); +int skb_crc32c_csum_help(struct sk_buff *skb) +{ + __le32 crc32c_csum; + int ret = 0, offset, start; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + goto out; + + if (unlikely(skb_is_gso(skb))) + goto out; + + /* Before computing a checksum, we should make sure no frag could + * be modified by an external entity : checksum could be wrong. + */ + if (unlikely(skb_has_shared_frag(skb))) { + ret = __skb_linearize(skb); + if (ret) + goto out; + } + start = skb_checksum_start_offset(skb); + offset = start + offsetof(struct sctphdr, checksum); + if (WARN_ON_ONCE(offset >= skb_headlen(skb))) { + ret = -EINVAL; + goto out; + } + if (skb_cloned(skb) && + !skb_clone_writable(skb, offset + sizeof(__le32))) { + ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (ret) + goto out; + } + crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start, + skb->len - start, ~(__u32)0, + crc32c_csum_stub)); + *(__le32 *)(skb->data + offset) = crc32c_csum; + skb->ip_summed = CHECKSUM_NONE; + skb->csum_not_inet = 0; +out: + return ret; +} + __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; @@ -2954,6 +3023,17 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, return skb; } +int skb_csum_hwoffload_help(struct sk_buff *skb, + const netdev_features_t features) +{ + if (unlikely(skb->csum_not_inet)) + return !!(features & NETIF_F_SCTP_CRC) ? 0 : + skb_crc32c_csum_help(skb); + + return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb); +} +EXPORT_SYMBOL(skb_csum_hwoffload_help); + static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) { netdev_features_t features; @@ -2992,8 +3072,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device else skb_set_transport_header(skb, skb_checksum_start_offset(skb)); - if (!(features & NETIF_F_CSUM_MASK) && - skb_checksum_help(skb)) + if (skb_csum_hwoffload_help(skb, features)) goto out_kfree_skb; } } @@ -3179,7 +3258,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ qdisc_bstats_cpu_update(cl->q, skb); - switch (tc_classify(skb, cl, &cl_res, false)) { + switch (tcf_classify(skb, cl, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); @@ -3191,6 +3270,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) return NULL; case TC_ACT_STOLEN: case TC_ACT_QUEUED: + case TC_ACT_TRAP: *ret = NET_XMIT_SUCCESS; consume_skb(skb); return NULL; @@ -3949,7 +4029,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, skb->tc_at_ingress = 1; qdisc_bstats_cpu_update(cl->q, skb); - switch (tc_classify(skb, cl, &cl_res, false)) { + switch (tcf_classify(skb, cl, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); @@ -3960,6 +4040,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, return NULL; case TC_ACT_STOLEN: case TC_ACT_QUEUED: + case TC_ACT_TRAP: consume_skb(skb); return NULL; case TC_ACT_REDIRECT: @@ -4637,9 +4718,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (netif_elide_gro(skb->dev)) goto normal; - if (skb->csum_bad) - goto normal; - gro_list_prepare(napi, skb); rcu_read_lock(); @@ -7015,7 +7093,7 @@ static void rollback_registered_many(struct list_head *head) if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, + skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, GFP_KERNEL); /* diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index b94b1d293506..77f04e71100f 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -225,6 +225,7 @@ static int net_hwtstamp_validate(struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: rx_filter_valid = 1; break; } diff --git a/net/core/filter.c b/net/core/filter.c index a6bb95fa87b2..60ed6f343a63 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -352,7 +352,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * bpf_convert_filter - convert filter program * @prog: the user passed filter program * @len: the length of the user passed filter program - * @new_prog: buffer where converted program will be stored + * @new_prog: allocated 'struct bpf_prog' or NULL * @new_len: pointer to store length of converted program * * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn' @@ -364,14 +364,13 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * * 2) 2nd pass to remap in two passes: 1st pass finds new * jump offsets, 2nd pass remapping: - * new_prog = kmalloc(sizeof(struct bpf_insn) * new_len); * bpf_convert_filter(old_prog, old_len, new_prog, &new_len); */ static int bpf_convert_filter(struct sock_filter *prog, int len, - struct bpf_insn *new_prog, int *new_len) + struct bpf_prog *new_prog, int *new_len) { - int new_flen = 0, pass = 0, target, i; - struct bpf_insn *new_insn; + int new_flen = 0, pass = 0, target, i, stack_off; + struct bpf_insn *new_insn, *first_insn = NULL; struct sock_filter *fp; int *addrs = NULL; u8 bpf_src; @@ -383,6 +382,7 @@ static int bpf_convert_filter(struct sock_filter *prog, int len, return -EINVAL; if (new_prog) { + first_insn = new_prog->insnsi; addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL | __GFP_NOWARN); if (!addrs) @@ -390,11 +390,11 @@ static int bpf_convert_filter(struct sock_filter *prog, int len, } do_pass: - new_insn = new_prog; + new_insn = first_insn; fp = prog; /* Classic BPF related prologue emission. */ - if (new_insn) { + if (new_prog) { /* Classic BPF expects A and X to be reset first. These need * to be guaranteed to be the first two instructions. */ @@ -415,7 +415,7 @@ do_pass: struct bpf_insn *insn = tmp_insns; if (addrs) - addrs[i] = new_insn - new_prog; + addrs[i] = new_insn - first_insn; switch (fp->code) { /* All arithmetic insns and skb loads map as-is. */ @@ -561,17 +561,25 @@ do_pass: /* Store to stack. */ case BPF_ST: case BPF_STX: + stack_off = fp->k * 4 + 4; *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) == BPF_ST ? BPF_REG_A : BPF_REG_X, - -(BPF_MEMWORDS - fp->k) * 4); + -stack_off); + /* check_load_and_stores() verifies that classic BPF can + * load from stack only after write, so tracking + * stack_depth for ST|STX insns is enough + */ + if (new_prog && new_prog->aux->stack_depth < stack_off) + new_prog->aux->stack_depth = stack_off; break; /* Load from stack. */ case BPF_LD | BPF_MEM: case BPF_LDX | BPF_MEM: + stack_off = fp->k * 4 + 4; *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? BPF_REG_A : BPF_REG_X, BPF_REG_FP, - -(BPF_MEMWORDS - fp->k) * 4); + -stack_off); break; /* A = K or X = K */ @@ -619,13 +627,13 @@ do_pass: if (!new_prog) { /* Only calculating new length. */ - *new_len = new_insn - new_prog; + *new_len = new_insn - first_insn; return 0; } pass++; - if (new_flen != new_insn - new_prog) { - new_flen = new_insn - new_prog; + if (new_flen != new_insn - first_insn) { + new_flen = new_insn - first_insn; if (pass > 2) goto err; goto do_pass; @@ -1017,7 +1025,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) fp->len = new_len; /* 2nd pass: remap sock_filter insns into bpf_insn insns. */ - err = bpf_convert_filter(old_prog, old_len, fp->insnsi, &new_len); + err = bpf_convert_filter(old_prog, old_len, fp, &new_len); if (err) /* 2nd bpf_convert_filter() can fail only if it fails * to allocate memory, remapping must succeed. Note, @@ -1866,6 +1874,24 @@ static const struct bpf_func_proto bpf_set_hash_invalid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash) +{ + /* Set user specified hash as L4(+), so that it gets returned + * on skb_get_hash() call unless BPF prog later on triggers a + * skb_clear_hash(). + */ + __skb_set_sw_hash(skb, hash, true); + return 0; +} + +static const struct bpf_func_proto bpf_set_hash_proto = { + .func = bpf_set_hash, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, u16, vlan_tci) { @@ -2736,6 +2762,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_get_hash_recalc_proto; case BPF_FUNC_set_hash_invalid: return &bpf_set_hash_invalid_proto; + case BPF_FUNC_set_hash: + return &bpf_set_hash_proto; case BPF_FUNC_perf_event_output: return &bpf_skb_event_output_proto; case BPF_FUNC_get_smp_processor_id: @@ -2767,12 +2795,6 @@ xdp_func_proto(enum bpf_func_id func_id) } static const struct bpf_func_proto * -cg_skb_func_proto(enum bpf_func_id func_id) -{ - return sk_filter_func_proto(func_id); -} - -static const struct bpf_func_proto * lwt_inout_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -2834,7 +2856,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id) } } -static bool __is_valid_access(int off, int size) +static bool __is_valid_access(int off, int size, enum bpf_access_type type, + int *ctx_field_size) { if (off < 0 || off >= sizeof(struct __sk_buff)) return false; @@ -2850,9 +2873,27 @@ static bool __is_valid_access(int off, int size) offsetof(struct __sk_buff, cb[4]) + sizeof(__u32)) return false; break; - default: + case offsetof(struct __sk_buff, data) ... + offsetof(struct __sk_buff, data) + sizeof(__u32) - 1: + case offsetof(struct __sk_buff, data_end) ... + offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1: if (size != sizeof(__u32)) return false; + break; + default: + /* permit narrower load for not cb/data/data_end fields */ + *ctx_field_size = 4; + if (type == BPF_WRITE) { + if (size != sizeof(__u32)) + return false; + } else { + if (size != sizeof(__u32)) +#ifdef __LITTLE_ENDIAN + return (off & 0x3) == 0 && (size == 1 || size == 2); +#else + return (off & 0x3) + size == 4 && (size == 1 || size == 2); +#endif + } } return true; @@ -2860,12 +2901,16 @@ static bool __is_valid_access(int off, int size) static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + enum bpf_reg_type *reg_type, + int *ctx_field_size) { switch (off) { - case offsetof(struct __sk_buff, tc_classid): - case offsetof(struct __sk_buff, data): - case offsetof(struct __sk_buff, data_end): + case offsetof(struct __sk_buff, tc_classid) ... + offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1: + case offsetof(struct __sk_buff, data) ... + offsetof(struct __sk_buff, data) + sizeof(__u32) - 1: + case offsetof(struct __sk_buff, data_end) ... + offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1: return false; } @@ -2879,15 +2924,17 @@ static bool sk_filter_is_valid_access(int off, int size, } } - return __is_valid_access(off, size); + return __is_valid_access(off, size, type, ctx_field_size); } static bool lwt_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + enum bpf_reg_type *reg_type, + int *ctx_field_size) { switch (off) { - case offsetof(struct __sk_buff, tc_classid): + case offsetof(struct __sk_buff, tc_classid) ... + offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1: return false; } @@ -2912,12 +2959,13 @@ static bool lwt_is_valid_access(int off, int size, break; } - return __is_valid_access(off, size); + return __is_valid_access(off, size, type, ctx_field_size); } static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + enum bpf_reg_type *reg_type, + int *ctx_field_size) { if (type == BPF_WRITE) { switch (off) { @@ -2980,7 +3028,8 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + enum bpf_reg_type *reg_type, + int *ctx_field_size) { if (type == BPF_WRITE) { switch (off) { @@ -3005,7 +3054,7 @@ static bool tc_cls_act_is_valid_access(int off, int size, break; } - return __is_valid_access(off, size); + return __is_valid_access(off, size, type, ctx_field_size); } static bool __is_valid_xdp_access(int off, int size) @@ -3022,7 +3071,8 @@ static bool __is_valid_xdp_access(int off, int size) static bool xdp_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + enum bpf_reg_type *reg_type, + int *ctx_field_size) { if (type == BPF_WRITE) return false; @@ -3336,7 +3386,7 @@ const struct bpf_verifier_ops xdp_prog_ops = { }; const struct bpf_verifier_ops cg_skb_prog_ops = { - .get_func_proto = cg_skb_func_proto, + .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, .test_run = bpf_prog_test_run_skb, diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 28d94bce4df8..fc5fc4594c90 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -18,6 +18,7 @@ #include <linux/stddef.h> #include <linux/if_ether.h> #include <linux/mpls.h> +#include <linux/tcp.h> #include <net/flow_dissector.h> #include <scsi/fc/fc_fcoe.h> @@ -342,6 +343,64 @@ __skb_flow_dissect_gre(const struct sk_buff *skb, return FLOW_DISSECT_RET_OUT_PROTO_AGAIN; } +static void +__skb_flow_dissect_tcp(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, int thoff, int hlen) +{ + struct flow_dissector_key_tcp *key_tcp; + struct tcphdr *th, _th; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_TCP)) + return; + + th = __skb_header_pointer(skb, thoff, sizeof(_th), data, hlen, &_th); + if (!th) + return; + + if (unlikely(__tcp_hdrlen(th) < sizeof(_th))) + return; + + key_tcp = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_TCP, + target_container); + key_tcp->flags = (*(__be16 *) &tcp_flag_word(th) & htons(0x0FFF)); +} + +static void +__skb_flow_dissect_ipv4(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, const struct iphdr *iph) +{ + struct flow_dissector_key_ip *key_ip; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP)) + return; + + key_ip = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IP, + target_container); + key_ip->tos = iph->tos; + key_ip->ttl = iph->ttl; +} + +static void +__skb_flow_dissect_ipv6(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, const struct ipv6hdr *iph) +{ + struct flow_dissector_key_ip *key_ip; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP)) + return; + + key_ip = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IP, + target_container); + key_ip->tos = ipv6_get_dsfield(iph); + key_ip->ttl = iph->hop_limit; +} + /** * __skb_flow_dissect - extract the flow_keys struct and return it * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified @@ -444,6 +503,9 @@ ip: } } + __skb_flow_dissect_ipv4(skb, flow_dissector, + target_container, data, iph); + if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) goto out_good; @@ -489,6 +551,9 @@ ipv6: goto out_good; } + __skb_flow_dissect_ipv6(skb, flow_dissector, + target_container, data, iph); + if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) goto out_good; @@ -683,6 +748,10 @@ ip_proto_again: case IPPROTO_MPLS: proto = htons(ETH_P_MPLS_UC); goto mpls; + case IPPROTO_TCP: + __skb_flow_dissect_tcp(skb, flow_dissector, target_container, + data, nhoff, hlen); + break; default: break; } diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index b3bc0a31af9f..1307731ddfe4 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -240,7 +240,8 @@ static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = { static int bpf_build_state(struct nlattr *nla, unsigned int family, const void *cfg, - struct lwtunnel_state **ts) + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) { struct nlattr *tb[LWT_BPF_MAX + 1]; struct lwtunnel_state *newts; @@ -250,7 +251,7 @@ static int bpf_build_state(struct nlattr *nla, if (family != AF_INET && family != AF_INET6) return -EAFNOSUPPORT; - ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, NULL); + ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, extack); if (ret < 0) return ret; diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index cfae3d5fe11f..d9cb3532f1dd 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -103,37 +103,53 @@ EXPORT_SYMBOL(lwtunnel_encap_del_ops); int lwtunnel_build_state(u16 encap_type, struct nlattr *encap, unsigned int family, - const void *cfg, struct lwtunnel_state **lws) + const void *cfg, struct lwtunnel_state **lws, + struct netlink_ext_ack *extack) { const struct lwtunnel_encap_ops *ops; + bool found = false; int ret = -EINVAL; if (encap_type == LWTUNNEL_ENCAP_NONE || - encap_type > LWTUNNEL_ENCAP_MAX) + encap_type > LWTUNNEL_ENCAP_MAX) { + NL_SET_ERR_MSG_ATTR(extack, encap, + "Unknown LWT encapsulation type"); return ret; + } ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); if (likely(ops && ops->build_state && try_module_get(ops->owner))) { - ret = ops->build_state(encap, family, cfg, lws); + found = true; + ret = ops->build_state(encap, family, cfg, lws, extack); if (ret) module_put(ops->owner); } rcu_read_unlock(); + /* don't rely on -EOPNOTSUPP to detect match as build_state + * handlers could return it + */ + if (!found) { + NL_SET_ERR_MSG_ATTR(extack, encap, + "LWT encapsulation type not supported"); + } + return ret; } EXPORT_SYMBOL(lwtunnel_build_state); -int lwtunnel_valid_encap_type(u16 encap_type) +int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) { const struct lwtunnel_encap_ops *ops; int ret = -EINVAL; if (encap_type == LWTUNNEL_ENCAP_NONE || - encap_type > LWTUNNEL_ENCAP_MAX) + encap_type > LWTUNNEL_ENCAP_MAX) { + NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type"); return ret; + } rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); @@ -153,11 +169,16 @@ int lwtunnel_valid_encap_type(u16 encap_type) } } #endif - return ops ? 0 : -EOPNOTSUPP; + ret = ops ? 0 : -EOPNOTSUPP; + if (ret < 0) + NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported"); + + return ret; } EXPORT_SYMBOL(lwtunnel_valid_encap_type); -int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining) +int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, + struct netlink_ext_ack *extack) { struct rtnexthop *rtnh = (struct rtnexthop *)attr; struct nlattr *nla_entype; @@ -174,7 +195,8 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining) if (nla_entype) { encap_type = nla_get_u16(nla_entype); - if (lwtunnel_valid_encap_type(encap_type) != 0) + if (lwtunnel_valid_encap_type(encap_type, + extack) != 0) return -EOPNOTSUPP; } } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d274f81fcc2c..dadb5eef91c3 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -118,6 +118,50 @@ unsigned long neigh_rand_reach_time(unsigned long base) EXPORT_SYMBOL(neigh_rand_reach_time); +static bool neigh_del(struct neighbour *n, __u8 state, + struct neighbour __rcu **np, struct neigh_table *tbl) +{ + bool retval = false; + + write_lock(&n->lock); + if (atomic_read(&n->refcnt) == 1 && !(n->nud_state & state)) { + struct neighbour *neigh; + + neigh = rcu_dereference_protected(n->next, + lockdep_is_held(&tbl->lock)); + rcu_assign_pointer(*np, neigh); + n->dead = 1; + retval = true; + } + write_unlock(&n->lock); + if (retval) + neigh_cleanup_and_release(n); + return retval; +} + +bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) +{ + struct neigh_hash_table *nht; + void *pkey = ndel->primary_key; + u32 hash_val; + struct neighbour *n; + struct neighbour __rcu **np; + + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); + hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); + hash_val = hash_val >> (32 - nht->hash_shift); + + np = &nht->hash_buckets[hash_val]; + while ((n = rcu_dereference_protected(*np, + lockdep_is_held(&tbl->lock)))) { + if (n == ndel) + return neigh_del(n, 0, np, tbl); + np = &n->next; + } + return false; +} + static int neigh_forced_gc(struct neigh_table *tbl) { int shrunk = 0; @@ -140,19 +184,10 @@ static int neigh_forced_gc(struct neigh_table *tbl) * - nobody refers to it. * - it is not permanent */ - write_lock(&n->lock); - if (atomic_read(&n->refcnt) == 1 && - !(n->nud_state & NUD_PERMANENT)) { - rcu_assign_pointer(*np, - rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock))); - n->dead = 1; - shrunk = 1; - write_unlock(&n->lock); - neigh_cleanup_and_release(n); + if (neigh_del(n, NUD_PERMANENT, np, tbl)) { + shrunk = 1; continue; } - write_unlock(&n->lock); np = &n->next; } } @@ -1649,7 +1684,10 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, NETLINK_CB(skb).portid); + write_lock_bh(&tbl->lock); neigh_release(neigh); + neigh_remove_one(neigh, tbl); + write_unlock_bh(&tbl->lock); out: return err; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 14d09345f00d..4847964931df 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -363,15 +363,10 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) netif_addr_lock_bh(dev); netdev_for_each_mc_addr(ha, dev) { - int i; - - seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, - dev->name, ha->refcount, ha->global_use); - - for (i = 0; i < dev->addr_len; i++) - seq_printf(seq, "%02x", ha->addr[i]); - - seq_putc(seq, '\n'); + seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n", + dev->ifindex, dev->name, + ha->refcount, ha->global_use, + (int)dev->addr_len, ha->addr); } netif_addr_unlock_bh(dev); return 0; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 65ea0ff4017c..58e6cc70500d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -323,7 +323,11 @@ NETDEVICE_SHOW_RW(flags, fmt_hex); static int change_tx_queue_len(struct net_device *dev, unsigned long new_len) { - int res, orig_len = dev->tx_queue_len; + unsigned int orig_len = dev->tx_queue_len; + int res; + + if (new_len != (unsigned int)new_len) + return -ERANGE; if (new_len != orig_len) { dev->tx_queue_len = new_len; @@ -349,7 +353,7 @@ static ssize_t tx_queue_len_store(struct device *dev, return netdev_store(dev, attr, buf, len, change_tx_queue_len); } -NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong); +NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec); static int change_gro_flush_timeout(struct net_device *dev, unsigned long val) { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 26bbfababff2..2178db8e47cd 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -596,6 +596,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; + struct nlattr *nla; struct net *peer; int nsid, err; @@ -603,23 +604,35 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, rtnl_net_policy, extack); if (err < 0) return err; - if (!tb[NETNSA_NSID]) + if (!tb[NETNSA_NSID]) { + NL_SET_ERR_MSG(extack, "nsid is missing"); return -EINVAL; + } nsid = nla_get_s32(tb[NETNSA_NSID]); - if (tb[NETNSA_PID]) + if (tb[NETNSA_PID]) { peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); - else if (tb[NETNSA_FD]) + nla = tb[NETNSA_PID]; + } else if (tb[NETNSA_FD]) { peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); - else + nla = tb[NETNSA_FD]; + } else { + NL_SET_ERR_MSG(extack, "Peer netns reference is missing"); return -EINVAL; - if (IS_ERR(peer)) + } + if (IS_ERR(peer)) { + NL_SET_BAD_ATTR(extack, nla); + NL_SET_ERR_MSG(extack, "Peer netns reference is invalid"); return PTR_ERR(peer); + } spin_lock_bh(&net->nsid_lock); if (__peernet2id(net, peer) >= 0) { spin_unlock_bh(&net->nsid_lock); err = -EEXIST; + NL_SET_BAD_ATTR(extack, nla); + NL_SET_ERR_MSG(extack, + "Peer netns already has a nsid assigned"); goto out; } @@ -628,6 +641,10 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err); err = 0; + } else if (err == -ENOSPC && nsid >= 0) { + err = -EEXIST; + NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]); + NL_SET_ERR_MSG(extack, "The specified nsid is already used"); } out: put_net(peer); @@ -670,6 +687,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; + struct nlattr *nla; struct sk_buff *msg; struct net *peer; int err, id; @@ -678,15 +696,22 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, rtnl_net_policy, extack); if (err < 0) return err; - if (tb[NETNSA_PID]) + if (tb[NETNSA_PID]) { peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); - else if (tb[NETNSA_FD]) + nla = tb[NETNSA_PID]; + } else if (tb[NETNSA_FD]) { peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); - else + nla = tb[NETNSA_FD]; + } else { + NL_SET_ERR_MSG(extack, "Peer netns reference is missing"); return -EINVAL; + } - if (IS_ERR(peer)) + if (IS_ERR(peer)) { + NL_SET_BAD_ATTR(extack, nla); + NL_SET_ERR_MSG(extack, "Peer netns reference is invalid"); return PTR_ERR(peer); + } msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); if (!msg) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5e61456f6bc7..2769ad9834d1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -941,6 +941,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + rtnl_xdp_size() /* IFLA_XDP */ + + nla_total_size(4) /* IFLA_EVENT */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } @@ -1283,9 +1284,40 @@ err_cancel: return err; } +static u32 rtnl_get_event(unsigned long event) +{ + u32 rtnl_event_type = IFLA_EVENT_NONE; + + switch (event) { + case NETDEV_REBOOT: + rtnl_event_type = IFLA_EVENT_REBOOT; + break; + case NETDEV_FEAT_CHANGE: + rtnl_event_type = IFLA_EVENT_FEATURES; + break; + case NETDEV_BONDING_FAILOVER: + rtnl_event_type = IFLA_EVENT_BONDING_FAILOVER; + break; + case NETDEV_NOTIFY_PEERS: + rtnl_event_type = IFLA_EVENT_NOTIFY_PEERS; + break; + case NETDEV_RESEND_IGMP: + rtnl_event_type = IFLA_EVENT_IGMP_RESEND; + break; + case NETDEV_CHANGEINFODATA: + rtnl_event_type = IFLA_EVENT_BONDING_OPTIONS; + break; + default: + break; + } + + return rtnl_event_type; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, - unsigned int flags, u32 ext_filter_mask) + unsigned int flags, u32 ext_filter_mask, + u32 event) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -1334,6 +1366,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) goto nla_put_failure; + if (event != IFLA_EVENT_NONE) { + if (nla_put_u32(skb, IFLA_EVENT, event)) + goto nla_put_failure; + } + if (rtnl_fill_link_ifmap(skb, dev)) goto nla_put_failure; @@ -1468,6 +1505,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINK_NETNSID] = { .type = NLA_S32 }, [IFLA_PROTO_DOWN] = { .type = NLA_U8 }, [IFLA_XDP] = { .type = NLA_NESTED }, + [IFLA_EVENT] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -1627,7 +1665,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, flags, - ext_filter_mask); + ext_filter_mask, 0); if (err < 0) { if (likely(skb->len)) @@ -2049,8 +2087,8 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_TXQLEN]) { - unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]); - unsigned long orig_len = dev->tx_queue_len; + unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]); + unsigned int orig_len = dev->tx_queue_len; if (dev->tx_queue_len ^ value) { dev->tx_queue_len = value; @@ -2737,7 +2775,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENOBUFS; err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, 0, 0, ext_filter_mask); + nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -2809,7 +2847,8 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) } struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, - unsigned int change, gfp_t flags) + unsigned int change, + u32 event, gfp_t flags) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -2820,7 +2859,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, if (skb == NULL) goto errout; - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -2841,18 +2880,25 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags) rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags); } -void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, - gfp_t flags) +static void rtmsg_ifinfo_event(int type, struct net_device *dev, + unsigned int change, u32 event, + gfp_t flags) { struct sk_buff *skb; if (dev->reg_state != NETREG_REGISTERED) return; - skb = rtmsg_ifinfo_build_skb(type, dev, change, flags); + skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags); if (skb) rtmsg_ifinfo_send(skb, dev, flags); } + +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, + gfp_t flags) +{ + rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags); +} EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, @@ -4169,7 +4215,8 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_NOTIFY_PEERS: case NETDEV_RESEND_IGMP: case NETDEV_CHANGEINFODATA: - rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); + rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), + GFP_KERNEL); break; default: break; diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index ae35cce3a40d..7232274de334 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -51,7 +51,8 @@ static u32 seq_scale(u32 seq) #endif #if IS_ENABLED(CONFIG_IPV6) -u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) +u32 secure_tcpv6_ts_off(const struct net *net, + const __be32 *saddr, const __be32 *daddr) { const struct { struct in6_addr saddr; @@ -61,7 +62,7 @@ u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) .daddr = *(struct in6_addr *)daddr, }; - if (sysctl_tcp_timestamps != 1) + if (net->ipv4.sysctl_tcp_timestamps != 1) return 0; ts_secret_init(); @@ -113,9 +114,9 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET -u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) +u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) { - if (sysctl_tcp_timestamps != 1) + if (net->ipv4.sysctl_tcp_timestamps != 1) return 0; ts_secret_init(); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b1be7c01efe2..c4d2c1f824bb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -643,12 +643,10 @@ fastpath: kmem_cache_free(skbuff_fclone_cache, fclones); } -static void skb_release_head_state(struct sk_buff *skb) +void skb_release_head_state(struct sk_buff *skb) { skb_dst_drop(skb); -#ifdef CONFIG_XFRM - secpath_put(skb->sp); -#endif + secpath_reset(skb); if (skb->destructor) { WARN_ON(in_irq()); skb->destructor(skb); @@ -694,12 +692,9 @@ EXPORT_SYMBOL(__kfree_skb); */ void kfree_skb(struct sk_buff *skb) { - if (unlikely(!skb)) - return; - if (likely(atomic_read(&skb->users) == 1)) - smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) + if (!skb_unref(skb)) return; + trace_kfree_skb(skb, __builtin_return_address(0)); __kfree_skb(skb); } @@ -746,17 +741,32 @@ EXPORT_SYMBOL(skb_tx_error); */ void consume_skb(struct sk_buff *skb) { - if (unlikely(!skb)) - return; - if (likely(atomic_read(&skb->users) == 1)) - smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) + if (!skb_unref(skb)) return; + trace_consume_skb(skb); __kfree_skb(skb); } EXPORT_SYMBOL(consume_skb); +/** + * consume_stateless_skb - free an skbuff, assuming it is stateless + * @skb: buffer to free + * + * Works like consume_skb(), but this variant assumes that all the head + * states have been already dropped. + */ +void consume_stateless_skb(struct sk_buff *skb) +{ + if (!skb_unref(skb)) + return; + + trace_consume_skb(skb); + if (likely(skb->head)) + skb_release_data(skb); + kfree_skbmem(skb); +} + void __kfree_skb_flush(void) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); @@ -807,10 +817,9 @@ void napi_consume_skb(struct sk_buff *skb, int budget) return; } - if (likely(atomic_read(&skb->users) == 1)) - smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) + if (!skb_unref(skb)) return; + /* if reaching here SKB is ready to free */ trace_consume_skb(skb); @@ -2243,6 +2252,32 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, } EXPORT_SYMBOL(skb_copy_and_csum_bits); +static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum) +{ + net_warn_ratelimited( + "%s: attempt to compute crc32c without libcrc32c.ko\n", + __func__); + return 0; +} + +static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2, + int offset, int len) +{ + net_warn_ratelimited( + "%s: attempt to compute crc32c without libcrc32c.ko\n", + __func__); + return 0; +} + +static const struct skb_checksum_ops default_crc32c_ops = { + .update = warn_crc32c_csum_update, + .combine = warn_crc32c_csum_combine, +}; + +const struct skb_checksum_ops *crc32c_csum_stub __read_mostly = + &default_crc32c_ops; +EXPORT_SYMBOL(crc32c_csum_stub); + /** * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() * @from: source buffer @@ -2620,7 +2655,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); - skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags & + SKBTX_SHARED_FRAG; if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -3235,8 +3271,8 @@ normal: skb_copy_from_linear_data_offset(head_skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags & - SKBTX_SHARED_FRAG; + skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & + SKBTX_SHARED_FRAG; while (pos < offset + len) { if (i >= nfrags) { @@ -3482,24 +3518,18 @@ void __init skb_init(void) NULL); } -/** - * skb_to_sgvec - Fill a scatter-gather list from a socket buffer - * @skb: Socket buffer containing the buffers to be mapped - * @sg: The scatter-gather list to map into - * @offset: The offset into the buffer's contents to start mapping - * @len: Length of buffer space to be mapped - * - * Fill the specified scatter-gather list with mappings/pointers into a - * region of the buffer space attached to a socket buffer. - */ static int -__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) +__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len, + unsigned int recursion_level) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int elt = 0; + if (unlikely(recursion_level >= 24)) + return -EMSGSIZE; + if (copy > 0) { if (copy > len) copy = len; @@ -3518,6 +3548,8 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + if (unlikely(elt && sg_is_last(&sg[elt - 1]))) + return -EMSGSIZE; if (copy > len) copy = len; @@ -3532,16 +3564,22 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) } skb_walk_frags(skb, frag_iter) { - int end; + int end, ret; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { + if (unlikely(elt && sg_is_last(&sg[elt - 1]))) + return -EMSGSIZE; + if (copy > len) copy = len; - elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start, - copy); + ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start, + copy, recursion_level + 1); + if (unlikely(ret < 0)) + return ret; + elt += ret; if ((len -= copy) == 0) return elt; offset += copy; @@ -3552,6 +3590,31 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) return elt; } +/** + * skb_to_sgvec - Fill a scatter-gather list from a socket buffer + * @skb: Socket buffer containing the buffers to be mapped + * @sg: The scatter-gather list to map into + * @offset: The offset into the buffer's contents to start mapping + * @len: Length of buffer space to be mapped + * + * Fill the specified scatter-gather list with mappings/pointers into a + * region of the buffer space attached to a socket buffer. Returns either + * the number of scatterlist items used, or -EMSGSIZE if the contents + * could not fit. + */ +int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) +{ + int nsg = __skb_to_sgvec(skb, sg, offset, len, 0); + + if (nsg <= 0) + return nsg; + + sg_mark_end(&sg[nsg - 1]); + + return nsg; +} +EXPORT_SYMBOL_GPL(skb_to_sgvec); + /* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given * sglist without mark the sg which contain last skb data as the end. * So the caller can mannipulate sg list as will when padding new data after @@ -3574,19 +3637,11 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { - return __skb_to_sgvec(skb, sg, offset, len); + return __skb_to_sgvec(skb, sg, offset, len, 0); } EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark); -int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) -{ - int nsg = __skb_to_sgvec(skb, sg, offset, len); - sg_mark_end(&sg[nsg - 1]); - - return nsg; -} -EXPORT_SYMBOL_GPL(skb_to_sgvec); /** * skb_cow_data - Check that a socket buffer's data buffers are writable @@ -3878,6 +3933,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, if (!sk) return; + if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) && + skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS) + return; + tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; if (!skb_may_tx_timestamp(sk, tsonly)) return; @@ -3899,7 +3958,8 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, return; if (tsonly) { - skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags; + skb_shinfo(skb)->tx_flags |= skb_shinfo(orig_skb)->tx_flags & + SKBTX_ANY_TSTAMP; skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey; } diff --git a/net/core/sock.c b/net/core/sock.c index 727f924b7f91..ad8a4bc84126 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1038,6 +1038,10 @@ set_rcvbuf: #endif case SO_MAX_PACING_RATE: + if (val != ~0U) + cmpxchg(&sk->sk_pacing_status, + SK_PACING_NONE, + SK_PACING_NEEDED); sk->sk_max_pacing_rate = val; sk->sk_pacing_rate = min(sk->sk_pacing_rate, sk->sk_max_pacing_rate); @@ -2072,6 +2076,26 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, } EXPORT_SYMBOL(sock_cmsg_send); +static void sk_enter_memory_pressure(struct sock *sk) +{ + if (!sk->sk_prot->enter_memory_pressure) + return; + + sk->sk_prot->enter_memory_pressure(sk); +} + +static void sk_leave_memory_pressure(struct sock *sk) +{ + if (sk->sk_prot->leave_memory_pressure) { + sk->sk_prot->leave_memory_pressure(sk); + } else { + unsigned long *memory_pressure = sk->sk_prot->memory_pressure; + + if (memory_pressure && *memory_pressure) + *memory_pressure = 0; + } +} + /* On 32bit arches, an skb frag is limited to 2^15 */ #define SKB_FRAG_PAGE_ORDER get_order(32768) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 93106120f987..733f523707ac 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -178,10 +178,6 @@ static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = { [DCB_ATTR_IEEE_QCN_STATS] = {.len = sizeof(struct ieee_qcn_stats)}, }; -static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = { - [DCB_ATTR_IEEE_APP] = {.len = sizeof(struct dcb_app)}, -}; - /* DCB number of traffic classes nested attributes. */ static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = { [DCB_FEATCFG_ATTR_ALL] = {.type = NLA_FLAG}, @@ -1463,8 +1459,15 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) { struct dcb_app *app_data; + if (nla_type(attr) != DCB_ATTR_IEEE_APP) continue; + + if (nla_len(attr) < sizeof(struct dcb_app)) { + err = -ERANGE; + goto err; + } + app_data = nla_data(attr); if (ops->ieee_setapp) err = ops->ieee_setapp(netdev, app_data); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 5e3a7302f774..e1295d5f2c56 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -233,7 +233,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - const u32 now = ccid2_time_stamp; + const u32 now = ccid2_jiffies32; struct ccid2_seq *next; /* slow-start after idle periods (RFC 2581, RFC 2861) */ @@ -466,7 +466,7 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, * The cleanest solution is to not use the ccid2s_sent field at all * and instead use DCCP timestamps: requires changes in other places. */ - ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent); + ccid2_rtt_estimator(sk, ccid2_jiffies32 - seqp->ccid2s_sent); } static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) @@ -478,7 +478,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) return; } - hc->tx_last_cong = ccid2_time_stamp; + hc->tx_last_cong = ccid2_jiffies32; hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; hc->tx_ssthresh = max(hc->tx_cwnd, 2U); @@ -731,7 +731,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hc->tx_rto = DCCP_TIMEOUT_INIT; hc->tx_rpdupack = -1; - hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_time_stamp; + hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32; hc->tx_cwnd_used = 0; setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 18c97543e522..6e50ef2898fb 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -27,7 +27,7 @@ * CCID-2 timestamping faces the same issues as TCP timestamping. * Hence we reuse/share as much of the code as possible. */ -#define ccid2_time_stamp tcp_time_stamp +#define ccid2_jiffies32 ((u32)jiffies) /* NUMDUPACK parameter from RFC 4341, p. 6 */ #define NUMDUPACK 3 diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 405483a07efc..73a0399dc7a2 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -447,7 +447,7 @@ static void dn_destruct(struct sock *sk) dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); } -static int dn_memory_pressure; +static unsigned long dn_memory_pressure; static void dn_enter_memory_pressure(struct sock *sk) { diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 81a0868edb1d..cc5f8f971689 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -25,16 +25,19 @@ config NET_DSA_TAG_DSA config NET_DSA_TAG_EDSA bool -config NET_DSA_TAG_TRAILER +config NET_DSA_TAG_KSZ bool -config NET_DSA_TAG_QCA +config NET_DSA_TAG_LAN9303 bool config NET_DSA_TAG_MTK bool -config NET_DSA_TAG_LAN9303 +config NET_DSA_TAG_TRAILER + bool + +config NET_DSA_TAG_QCA bool endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 0b747d75e65a..fcce25da937c 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,12 +1,13 @@ # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o slave.o dsa2.o switch.o legacy.o +dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o # tagging formats dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o -dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o -dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o -dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o +dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o dsa_core-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o +dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o +dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o +dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 90038d45a547..416ac4ef9ba9 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -24,7 +24,7 @@ #include <linux/phy_fixed.h> #include <linux/gpio/consumer.h> #include <linux/etherdevice.h> -#include <net/dsa.h> + #include "dsa_priv.h" static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb, @@ -40,26 +40,29 @@ static const struct dsa_device_ops none_ops = { }; const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { +#ifdef CONFIG_NET_DSA_TAG_BRCM + [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops, +#endif #ifdef CONFIG_NET_DSA_TAG_DSA [DSA_TAG_PROTO_DSA] = &dsa_netdev_ops, #endif #ifdef CONFIG_NET_DSA_TAG_EDSA [DSA_TAG_PROTO_EDSA] = &edsa_netdev_ops, #endif -#ifdef CONFIG_NET_DSA_TAG_TRAILER - [DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops, -#endif -#ifdef CONFIG_NET_DSA_TAG_BRCM - [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops, +#ifdef CONFIG_NET_DSA_TAG_KSZ + [DSA_TAG_PROTO_KSZ] = &ksz_netdev_ops, #endif -#ifdef CONFIG_NET_DSA_TAG_QCA - [DSA_TAG_PROTO_QCA] = &qca_netdev_ops, +#ifdef CONFIG_NET_DSA_TAG_LAN9303 + [DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops, #endif #ifdef CONFIG_NET_DSA_TAG_MTK [DSA_TAG_PROTO_MTK] = &mtk_netdev_ops, #endif -#ifdef CONFIG_NET_DSA_TAG_LAN9303 - [DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops, +#ifdef CONFIG_NET_DSA_TAG_QCA + [DSA_TAG_PROTO_QCA] = &qca_netdev_ops, +#endif +#ifdef CONFIG_NET_DSA_TAG_TRAILER + [DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops, #endif [DSA_TAG_PROTO_NONE] = &none_ops, }; @@ -109,23 +112,22 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) return ops; } -int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds) +int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp) { + struct dsa_switch *ds = cpu_dp->ds; struct net_device *master; struct ethtool_ops *cpu_ops; - master = ds->dst->master_netdev; - if (ds->master_netdev) - master = ds->master_netdev; + master = cpu_dp->netdev; cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL); if (!cpu_ops) return -ENOMEM; - memcpy(&ds->dst->master_ethtool_ops, master->ethtool_ops, + memcpy(&cpu_dp->ethtool_ops, master->ethtool_ops, sizeof(struct ethtool_ops)); - ds->dst->master_orig_ethtool_ops = master->ethtool_ops; - memcpy(cpu_ops, &ds->dst->master_ethtool_ops, + cpu_dp->orig_ethtool_ops = master->ethtool_ops; + memcpy(cpu_ops, &cpu_dp->ethtool_ops, sizeof(struct ethtool_ops)); dsa_cpu_port_ethtool_init(cpu_ops); master->ethtool_ops = cpu_ops; @@ -133,15 +135,9 @@ int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds) return 0; } -void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds) +void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp) { - struct net_device *master; - - master = ds->dst->master_netdev; - if (ds->master_netdev) - master = ds->master_netdev; - - master->ethtool_ops = ds->dst->master_orig_ethtool_ops; + cpu_dp->netdev->ethtool_ops = cpu_dp->orig_ethtool_ops; } void dsa_cpu_dsa_destroy(struct dsa_port *port) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 7796580e99ee..52af8401af07 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -18,7 +18,7 @@ #include <linux/rtnetlink.h> #include <linux/of.h> #include <linux/of_net.h> -#include <net/dsa.h> + #include "dsa_priv.h" static LIST_HEAD(dsa_switch_trees); @@ -214,66 +214,59 @@ static int dsa_dst_complete(struct dsa_switch_tree *dst) return 0; } -static int dsa_dsa_port_apply(struct dsa_port *port, u32 index, - struct dsa_switch *ds) +static int dsa_dsa_port_apply(struct dsa_port *port) { + struct dsa_switch *ds = port->ds; int err; - err = dsa_cpu_dsa_setup(ds, ds->dev, port, index); + err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index); if (err) { dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n", - index, err); + port->index, err); return err; } - memset(&ds->ports[index].devlink_port, 0, - sizeof(ds->ports[index].devlink_port)); + memset(&port->devlink_port, 0, sizeof(port->devlink_port)); - return devlink_port_register(ds->devlink, - &ds->ports[index].devlink_port, - index); + return devlink_port_register(ds->devlink, &port->devlink_port, + port->index); } -static void dsa_dsa_port_unapply(struct dsa_port *port, u32 index, - struct dsa_switch *ds) +static void dsa_dsa_port_unapply(struct dsa_port *port) { - devlink_port_unregister(&ds->ports[index].devlink_port); + devlink_port_unregister(&port->devlink_port); dsa_cpu_dsa_destroy(port); } -static int dsa_cpu_port_apply(struct dsa_port *port, u32 index, - struct dsa_switch *ds) +static int dsa_cpu_port_apply(struct dsa_port *port) { + struct dsa_switch *ds = port->ds; int err; - err = dsa_cpu_dsa_setup(ds, ds->dev, port, index); + err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index); if (err) { dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n", - index, err); + port->index, err); return err; } - ds->cpu_port_mask |= BIT(index); - - memset(&ds->ports[index].devlink_port, 0, - sizeof(ds->ports[index].devlink_port)); - err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port, - index); + memset(&port->devlink_port, 0, sizeof(port->devlink_port)); + err = devlink_port_register(ds->devlink, &port->devlink_port, + port->index); return err; } -static void dsa_cpu_port_unapply(struct dsa_port *port, u32 index, - struct dsa_switch *ds) +static void dsa_cpu_port_unapply(struct dsa_port *port) { - devlink_port_unregister(&ds->ports[index].devlink_port); + devlink_port_unregister(&port->devlink_port); dsa_cpu_dsa_destroy(port); - ds->cpu_port_mask &= ~BIT(index); + port->ds->cpu_port_mask &= ~BIT(port->index); } -static int dsa_user_port_apply(struct dsa_port *port, u32 index, - struct dsa_switch *ds) +static int dsa_user_port_apply(struct dsa_port *port) { + struct dsa_switch *ds = port->ds; const char *name = port->name; int err; @@ -282,35 +275,32 @@ static int dsa_user_port_apply(struct dsa_port *port, u32 index, if (!name) name = "eth%d"; - err = dsa_slave_create(ds, ds->dev, index, name); + err = dsa_slave_create(ds, ds->dev, port->index, name); if (err) { dev_warn(ds->dev, "Failed to create slave %d: %d\n", - index, err); - ds->ports[index].netdev = NULL; + port->index, err); + port->netdev = NULL; return err; } - memset(&ds->ports[index].devlink_port, 0, - sizeof(ds->ports[index].devlink_port)); - err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port, - index); + memset(&port->devlink_port, 0, sizeof(port->devlink_port)); + err = devlink_port_register(ds->devlink, &port->devlink_port, + port->index); if (err) return err; - devlink_port_type_eth_set(&ds->ports[index].devlink_port, - ds->ports[index].netdev); + devlink_port_type_eth_set(&port->devlink_port, port->netdev); return 0; } -static void dsa_user_port_unapply(struct dsa_port *port, u32 index, - struct dsa_switch *ds) +static void dsa_user_port_unapply(struct dsa_port *port) { - devlink_port_unregister(&ds->ports[index].devlink_port); - if (ds->ports[index].netdev) { - dsa_slave_destroy(ds->ports[index].netdev); - ds->ports[index].netdev = NULL; - ds->enabled_port_mask &= ~(1 << index); + devlink_port_unregister(&port->devlink_port); + if (port->netdev) { + dsa_slave_destroy(port->netdev); + port->netdev = NULL; + port->ds->enabled_port_mask &= ~(1 << port->index); } } @@ -347,7 +337,7 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) return err; if (ds->ops->set_addr) { - err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); + err = ds->ops->set_addr(ds, dst->cpu_dp->netdev->dev_addr); if (err < 0) return err; } @@ -370,20 +360,20 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) continue; if (dsa_port_is_dsa(port)) { - err = dsa_dsa_port_apply(port, index, ds); + err = dsa_dsa_port_apply(port); if (err) return err; continue; } if (dsa_port_is_cpu(port)) { - err = dsa_cpu_port_apply(port, index, ds); + err = dsa_cpu_port_apply(port); if (err) return err; continue; } - err = dsa_user_port_apply(port, index, ds); + err = dsa_user_port_apply(port); if (err) continue; } @@ -402,16 +392,16 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) continue; if (dsa_port_is_dsa(port)) { - dsa_dsa_port_unapply(port, index, ds); + dsa_dsa_port_unapply(port); continue; } if (dsa_port_is_cpu(port)) { - dsa_cpu_port_unapply(port, index, ds); + dsa_cpu_port_unapply(port); continue; } - dsa_user_port_unapply(port, index, ds); + dsa_user_port_unapply(port); } if (ds->slave_mii_bus && ds->ops->phy_read) @@ -443,8 +433,8 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) return err; } - if (dst->cpu_switch) { - err = dsa_cpu_port_ethtool_setup(dst->cpu_switch); + if (dst->cpu_dp) { + err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); if (err) return err; } @@ -454,7 +444,7 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) * sent to the tag format's receive function. */ wmb(); - dst->master_netdev->dsa_ptr = (void *)dst; + dst->cpu_dp->netdev->dsa_ptr = dst; dst->applied = true; return 0; @@ -468,7 +458,7 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) if (!dst->applied) return; - dst->master_netdev->dsa_ptr = NULL; + dst->cpu_dp->netdev->dsa_ptr = NULL; /* If we used a tagging format that doesn't have an ethertype * field, make sure that all packets from this point get sent @@ -484,9 +474,9 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dsa_ds_unapply(dst, ds); } - if (dst->cpu_switch) { - dsa_cpu_port_ethtool_restore(dst->cpu_switch); - dst->cpu_switch = NULL; + if (dst->cpu_dp) { + dsa_cpu_port_ethtool_restore(dst->cpu_dp); + dst->cpu_dp = NULL; } pr_info("DSA: tree %d unapplied\n", dst->tree); @@ -500,6 +490,8 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, enum dsa_tag_protocol tag_protocol; struct net_device *ethernet_dev; struct device_node *ethernet; + struct dsa_port *p; + unsigned int i; if (port->dn) { ethernet = of_parse_phandle(port->dn, "ethernet", 0); @@ -514,15 +506,18 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, if (!ethernet_dev) return -EPROBE_DEFER; - if (!ds->master_netdev) - ds->master_netdev = ethernet_dev; + if (!dst->cpu_dp) { + dst->cpu_dp = port; + dst->cpu_dp->netdev = ethernet_dev; - if (!dst->master_netdev) - dst->master_netdev = ethernet_dev; + for (i = 0; i < ds->num_ports; i++) { + p = &ds->ports[i]; + if (!dsa_port_is_valid(p) || + i == index) + continue; - if (!dst->cpu_switch) { - dst->cpu_switch = ds; - dst->cpu_port = index; + p->cpu_dp = port; + } } tag_protocol = ds->ops->get_tag_protocol(ds); @@ -534,6 +529,12 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, dst->rcv = dst->tag_ops->rcv; + /* Initialize cpu_port_mask now for drv->setup() + * to have access to a correct value, just like what + * net/dsa/dsa.c::dsa_switch_setup_one does. + */ + ds->cpu_port_mask |= BIT(index); + return 0; } @@ -545,14 +546,22 @@ static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds) for (index = 0; index < ds->num_ports; index++) { port = &ds->ports[index]; - if (!dsa_port_is_valid(port)) + if (!dsa_port_is_valid(port) || + dsa_port_is_dsa(port)) continue; if (dsa_port_is_cpu(port)) { err = dsa_cpu_parse(port, index, dst, ds); if (err) return err; + } else { + /* Initialize enabled_port_mask now for drv->setup() + * to have access to a correct value, just like what + * net/dsa/dsa.c::dsa_switch_setup_one does. + */ + ds->enabled_port_mask |= BIT(index); } + } pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index); @@ -576,7 +585,7 @@ static int dsa_dst_parse(struct dsa_switch_tree *dst) return err; } - if (!dst->master_netdev) { + if (!dst->cpu_dp->netdev) { pr_warn("Tree has no master device\n"); return -EINVAL; } @@ -601,13 +610,6 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds) return -EINVAL; ds->ports[reg].dn = port; - - /* Initialize enabled_port_mask now for ops->setup() - * to have access to a correct value, just like what - * net/dsa/dsa.c::dsa_switch_setup_one does. - */ - if (!dsa_port_is_cpu(&ds->ports[reg])) - ds->enabled_port_mask |= 1 << reg; } return 0; @@ -623,14 +625,6 @@ static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds) continue; ds->ports[i].name = cd->port_names[i]; - - /* Initialize enabled_port_mask now for drv->setup() - * to have access to a correct value, just like what - * net/dsa/dsa.c::dsa_switch_setup_one does. - */ - if (!dsa_port_is_cpu(&ds->ports[i])) - ds->enabled_port_mask |= 1 << i; - valid_name_found = true; } @@ -690,10 +684,10 @@ static struct device_node *dsa_get_ports(struct dsa_switch *ds, return ports; } -static int _dsa_register_switch(struct dsa_switch *ds, struct device *dev) +static int _dsa_register_switch(struct dsa_switch *ds) { - struct dsa_chip_data *pdata = dev->platform_data; - struct device_node *np = dev->of_node; + struct dsa_chip_data *pdata = ds->dev->platform_data; + struct device_node *np = ds->dev->of_node; struct dsa_switch_tree *dst; struct device_node *ports; u32 tree, index; @@ -807,12 +801,12 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) } EXPORT_SYMBOL_GPL(dsa_switch_alloc); -int dsa_register_switch(struct dsa_switch *ds, struct device *dev) +int dsa_register_switch(struct dsa_switch *ds) { int err; mutex_lock(&dsa2_mutex); - err = _dsa_register_switch(ds, dev); + err = _dsa_register_switch(ds); mutex_unlock(&dsa2_mutex); return err; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index f4a88e485213..55982cc39b24 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -14,6 +14,56 @@ #include <linux/phy.h> #include <linux/netdevice.h> #include <linux/netpoll.h> +#include <net/dsa.h> + +enum { + DSA_NOTIFIER_AGEING_TIME, + DSA_NOTIFIER_BRIDGE_JOIN, + DSA_NOTIFIER_BRIDGE_LEAVE, + DSA_NOTIFIER_FDB_ADD, + DSA_NOTIFIER_FDB_DEL, + DSA_NOTIFIER_MDB_ADD, + DSA_NOTIFIER_MDB_DEL, + DSA_NOTIFIER_VLAN_ADD, + DSA_NOTIFIER_VLAN_DEL, +}; + +/* DSA_NOTIFIER_AGEING_TIME */ +struct dsa_notifier_ageing_time_info { + struct switchdev_trans *trans; + unsigned int ageing_time; +}; + +/* DSA_NOTIFIER_BRIDGE_* */ +struct dsa_notifier_bridge_info { + struct net_device *br; + int sw_index; + int port; +}; + +/* DSA_NOTIFIER_FDB_* */ +struct dsa_notifier_fdb_info { + const struct switchdev_obj_port_fdb *fdb; + struct switchdev_trans *trans; + int sw_index; + int port; +}; + +/* DSA_NOTIFIER_MDB_* */ +struct dsa_notifier_mdb_info { + const struct switchdev_obj_port_mdb *mdb; + struct switchdev_trans *trans; + int sw_index; + int port; +}; + +/* DSA_NOTIFIER_VLAN_* */ +struct dsa_notifier_vlan_info { + const struct switchdev_obj_port_vlan *vlan; + struct switchdev_trans *trans; + int sw_index; + int port; +}; struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); @@ -23,6 +73,7 @@ struct dsa_device_ops { }; struct dsa_slave_priv { + /* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */ struct sk_buff * (*xmit)(struct sk_buff *skb, struct net_device *dev); @@ -52,13 +103,46 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, struct dsa_port *dport, int port); void dsa_cpu_dsa_destroy(struct dsa_port *dport); const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); -int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds); -void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds); +int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp); +void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp); /* legacy.c */ int dsa_legacy_register(void); void dsa_legacy_unregister(void); +/* port.c */ +int dsa_port_set_state(struct dsa_port *dp, u8 state, + struct switchdev_trans *trans); +void dsa_port_set_state_now(struct dsa_port *dp, u8 state); +int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); +void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); +int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, + struct switchdev_trans *trans); +int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, + struct switchdev_trans *trans); +int dsa_port_fdb_add(struct dsa_port *dp, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans); +int dsa_port_fdb_del(struct dsa_port *dp, + const struct switchdev_obj_port_fdb *fdb); +int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb, + switchdev_obj_dump_cb_t *cb); +int dsa_port_mdb_add(struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans); +int dsa_port_mdb_del(struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb, + switchdev_obj_dump_cb_t *cb); +int dsa_port_vlan_add(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, + struct switchdev_trans *trans); +int dsa_port_vlan_del(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan); +int dsa_port_vlan_dump(struct dsa_port *dp, + struct switchdev_obj_port_vlan *vlan, + switchdev_obj_dump_cb_t *cb); + /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); @@ -75,25 +159,38 @@ void dsa_slave_unregister_notifier(void); int dsa_switch_register_notifier(struct dsa_switch *ds); void dsa_switch_unregister_notifier(struct dsa_switch *ds); +/* tag_brcm.c */ +extern const struct dsa_device_ops brcm_netdev_ops; + /* tag_dsa.c */ extern const struct dsa_device_ops dsa_netdev_ops; /* tag_edsa.c */ extern const struct dsa_device_ops edsa_netdev_ops; -/* tag_trailer.c */ -extern const struct dsa_device_ops trailer_netdev_ops; +/* tag_ksz.c */ +extern const struct dsa_device_ops ksz_netdev_ops; -/* tag_brcm.c */ -extern const struct dsa_device_ops brcm_netdev_ops; +/* tag_lan9303.c */ +extern const struct dsa_device_ops lan9303_netdev_ops; + +/* tag_mtk.c */ +extern const struct dsa_device_ops mtk_netdev_ops; /* tag_qca.c */ extern const struct dsa_device_ops qca_netdev_ops; -/* tag_mtk.c */ -extern const struct dsa_device_ops mtk_netdev_ops; +/* tag_trailer.c */ +extern const struct dsa_device_ops trailer_netdev_ops; -/* tag_lan9303.c */ -extern const struct dsa_device_ops lan9303_netdev_ops; +static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p) +{ + return p->dp->cpu_dp->netdev; +} + +static inline struct dsa_port *dsa_get_cpu_port(struct dsa_switch_tree *dst) +{ + return dst->cpu_dp; +} #endif diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 7281098df04e..e60906125375 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -22,7 +22,7 @@ #include <linux/sysfs.h> #include <linux/phy_fixed.h> #include <linux/etherdevice.h> -#include <net/dsa.h> + #include "dsa_priv.h" /* switch driver registration ***********************************************/ @@ -101,9 +101,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) struct dsa_switch_tree *dst = ds->dst; struct dsa_chip_data *cd = ds->cd; bool valid_name_found = false; + struct net_device *master; int index = ds->index; int i, ret; + master = dst->cpu_dp->netdev; + /* * Validate supplied switch configuration. */ @@ -115,18 +118,18 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) continue; if (!strcmp(name, "cpu")) { - if (dst->cpu_switch) { - netdev_err(dst->master_netdev, + if (dst->cpu_dp) { + netdev_err(master, "multiple cpu ports?!\n"); return -EINVAL; } - dst->cpu_switch = ds; - dst->cpu_port = i; + dst->cpu_dp = &ds->ports[i]; ds->cpu_port_mask |= 1 << i; } else if (!strcmp(name, "dsa")) { ds->dsa_port_mask |= 1 << i; } else { ds->enabled_port_mask |= 1 << i; + ds->ports[i].cpu_dp = dst->cpu_dp; } valid_name_found = true; } @@ -144,7 +147,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * tagging protocol to the preferred tagging format of this * switch. */ - if (dst->cpu_switch == ds) { + if (dst->cpu_dp->ds == ds) { enum dsa_tag_protocol tag_protocol; tag_protocol = ops->get_tag_protocol(ds); @@ -169,7 +172,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) return ret; if (ops->set_addr) { - ret = ops->set_addr(ds, dst->master_netdev->dev_addr); + ret = ops->set_addr(ds, master->dev_addr); if (ret < 0) return ret; } @@ -196,17 +199,17 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) ret = dsa_slave_create(ds, parent, i, cd->port_names[i]); if (ret < 0) - netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n", + netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n", index, i, cd->port_names[i], ret); } /* Perform configuration of the CPU and DSA ports */ ret = dsa_cpu_dsa_setups(ds, parent); if (ret < 0) - netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n", + netdev_err(master, "[%d] : can't configure CPU and DSA ports\n", index); - ret = dsa_cpu_port_ethtool_setup(ds); + ret = dsa_cpu_port_ethtool_setup(ds->dst->cpu_dp); if (ret) return ret; @@ -218,6 +221,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, struct device *parent, struct device *host_dev) { struct dsa_chip_data *cd = dst->pd->chip + index; + struct net_device *master = dst->cpu_dp->netdev; const struct dsa_switch_ops *ops; struct dsa_switch *ds; int ret; @@ -229,11 +233,11 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, */ ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); if (!ops) { - netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", + netdev_err(master, "[%d]: could not detect attached switch\n", index); return ERR_PTR(-EINVAL); } - netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n", + netdev_info(master, "[%d]: detected a %s switch\n", index, name); @@ -576,8 +580,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, unsigned configured = 0; dst->pd = pd; - dst->master_netdev = dev; - dst->cpu_port = -1; + dst->cpu_dp->netdev = dev; for (i = 0; i < pd->nr_chips; i++) { struct dsa_switch *ds; @@ -606,7 +609,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, * sent to the tag format's receive function. */ wmb(); - dev->dsa_ptr = (void *)dst; + dev->dsa_ptr = dst; return 0; } @@ -673,7 +676,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) { int i; - dst->master_netdev->dsa_ptr = NULL; + dst->cpu_dp->netdev->dsa_ptr = NULL; /* If we used a tagging format that doesn't have an ethertype * field, make sure that all packets from this point get sent @@ -688,9 +691,9 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) dsa_switch_destroy(ds); } - dsa_cpu_port_ethtool_restore(dst->cpu_switch); + dsa_cpu_port_ethtool_restore(dst->cpu_dp); - dev_put(dst->master_netdev); + dev_put(dst->cpu_dp->netdev); } static int dsa_remove(struct platform_device *pdev) diff --git a/net/dsa/port.c b/net/dsa/port.c new file mode 100644 index 000000000000..efc3bce3a89d --- /dev/null +++ b/net/dsa/port.c @@ -0,0 +1,259 @@ +/* + * Handling of a single switch port + * + * Copyright (c) 2017 Savoir-faire Linux Inc. + * Vivien Didelot <vivien.didelot@savoirfairelinux.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/if_bridge.h> +#include <linux/notifier.h> + +#include "dsa_priv.h" + +static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v) +{ + struct raw_notifier_head *nh = &dp->ds->dst->nh; + int err; + + err = raw_notifier_call_chain(nh, e, v); + + return notifier_to_errno(err); +} + +int dsa_port_set_state(struct dsa_port *dp, u8 state, + struct switchdev_trans *trans) +{ + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (switchdev_trans_ph_prepare(trans)) + return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; + + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, port, state); + + if (ds->ops->port_fast_age) { + /* Fast age FDB entries or flush appropriate forwarding database + * for the given port, if we are moving it from Learning or + * Forwarding state, to Disabled or Blocking or Listening state. + */ + + if ((dp->stp_state == BR_STATE_LEARNING || + dp->stp_state == BR_STATE_FORWARDING) && + (state == BR_STATE_DISABLED || + state == BR_STATE_BLOCKING || + state == BR_STATE_LISTENING)) + ds->ops->port_fast_age(ds, port); + } + + dp->stp_state = state; + + return 0; +} + +void dsa_port_set_state_now(struct dsa_port *dp, u8 state) +{ + int err; + + err = dsa_port_set_state(dp, state, NULL); + if (err) + pr_err("DSA: failed to set STP state %u (%d)\n", state, err); +} + +int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) +{ + struct dsa_notifier_bridge_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .br = br, + }; + int err; + + /* Here the port is already bridged. Reflect the current configuration + * so that drivers can program their chips accordingly. + */ + dp->bridge_dev = br; + + err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_JOIN, &info); + + /* The bridging is rolled back on error */ + if (err) + dp->bridge_dev = NULL; + + return err; +} + +void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) +{ + struct dsa_notifier_bridge_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .br = br, + }; + int err; + + /* Here the port is already unbridged. Reflect the current configuration + * so that drivers can program their chips accordingly. + */ + dp->bridge_dev = NULL; + + err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_LEAVE, &info); + if (err) + pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); + + /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, + * so allow it to be in BR_STATE_FORWARDING to be kept functional + */ + dsa_port_set_state_now(dp, BR_STATE_FORWARDING); +} + +int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, + struct switchdev_trans *trans) +{ + struct dsa_switch *ds = dp->ds; + + /* bridge skips -EOPNOTSUPP, so skip the prepare phase */ + if (switchdev_trans_ph_prepare(trans)) + return 0; + + if (ds->ops->port_vlan_filtering) + return ds->ops->port_vlan_filtering(ds, dp->index, + vlan_filtering); + + return 0; +} + +int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, + struct switchdev_trans *trans) +{ + unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock); + unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies); + struct dsa_notifier_ageing_time_info info = { + .ageing_time = ageing_time, + .trans = trans, + }; + + if (switchdev_trans_ph_prepare(trans)) + return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); + + dp->ageing_time = ageing_time; + + return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); +} + +int dsa_port_fdb_add(struct dsa_port *dp, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans) +{ + struct dsa_notifier_fdb_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .trans = trans, + .fdb = fdb, + }; + + return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info); +} + +int dsa_port_fdb_del(struct dsa_port *dp, + const struct switchdev_obj_port_fdb *fdb) +{ + struct dsa_notifier_fdb_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .fdb = fdb, + }; + + return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info); +} + +int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb, + switchdev_obj_dump_cb_t *cb) +{ + struct dsa_switch *ds = dp->ds; + + if (ds->ops->port_fdb_dump) + return ds->ops->port_fdb_dump(ds, dp->index, fdb, cb); + + return -EOPNOTSUPP; +} + +int dsa_port_mdb_add(struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans) +{ + struct dsa_notifier_mdb_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .trans = trans, + .mdb = mdb, + }; + + return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info); +} + +int dsa_port_mdb_del(struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb) +{ + struct dsa_notifier_mdb_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .mdb = mdb, + }; + + return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info); +} + +int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb, + switchdev_obj_dump_cb_t *cb) +{ + struct dsa_switch *ds = dp->ds; + + if (ds->ops->port_mdb_dump) + return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb); + + return -EOPNOTSUPP; +} + +int dsa_port_vlan_add(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, + struct switchdev_trans *trans) +{ + struct dsa_notifier_vlan_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .trans = trans, + .vlan = vlan, + }; + + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); +} + +int dsa_port_vlan_del(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan) +{ + struct dsa_notifier_vlan_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .vlan = vlan, + }; + + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); +} + +int dsa_port_vlan_dump(struct dsa_port *dp, + struct switchdev_obj_port_vlan *vlan, + switchdev_obj_dump_cb_t *cb) +{ + struct dsa_switch *ds = dp->ds; + + if (ds->ops->port_vlan_dump) + return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb); + + return -EOPNOTSUPP; +} diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 7693182df81e..9507bd38cf04 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -17,28 +17,16 @@ #include <linux/of_mdio.h> #include <linux/mdio.h> #include <linux/list.h> -#include <net/dsa.h> #include <net/rtnetlink.h> -#include <net/switchdev.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_mirred.h> #include <linux/if_bridge.h> #include <linux/netpoll.h> + #include "dsa_priv.h" static bool dsa_slave_dev_check(struct net_device *dev); -static int dsa_slave_notify(struct net_device *dev, unsigned long e, void *v) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct raw_notifier_head *nh = &p->dp->ds->dst->nh; - int err; - - err = raw_notifier_call_chain(nh, e, v); - - return notifier_to_errno(err); -} - /* slave mii_bus handling ***************************************************/ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) { @@ -78,48 +66,16 @@ static int dsa_slave_get_iflink(const struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - return p->dp->ds->dst->master_netdev->ifindex; -} - -static inline bool dsa_port_is_bridged(struct dsa_port *dp) -{ - return !!dp->bridge_dev; + return dsa_master_netdev(p)->ifindex; } -static void dsa_slave_set_state(struct net_device *dev, u8 state) +static int dsa_slave_open(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_port *dp = p->dp; struct dsa_switch *ds = dp->ds; - int port = dp->index; - - if (ds->ops->port_stp_state_set) - ds->ops->port_stp_state_set(ds, port, state); - - if (ds->ops->port_fast_age) { - /* Fast age FDB entries or flush appropriate forwarding database - * for the given port, if we are moving it from Learning or - * Forwarding state, to Disabled or Blocking or Listening state. - */ - - if ((dp->stp_state == BR_STATE_LEARNING || - dp->stp_state == BR_STATE_FORWARDING) && - (state == BR_STATE_DISABLED || - state == BR_STATE_BLOCKING || - state == BR_STATE_LISTENING)) - ds->ops->port_fast_age(ds, port); - } - - dp->stp_state = state; -} - -static int dsa_slave_open(struct net_device *dev) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->dp->ds->dst->master_netdev; - struct dsa_switch *ds = p->dp->ds; - u8 stp_state = dsa_port_is_bridged(p->dp) ? - BR_STATE_BLOCKING : BR_STATE_FORWARDING; + struct net_device *master = dsa_master_netdev(p); + u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING; int err; if (!(master->flags & IFF_UP)) @@ -148,7 +104,7 @@ static int dsa_slave_open(struct net_device *dev) goto clear_promisc; } - dsa_slave_set_state(dev, stp_state); + dsa_port_set_state_now(p->dp, stp_state); if (p->phy) phy_start(p->phy); @@ -171,7 +127,7 @@ out: static int dsa_slave_close(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->dp->ds->dst->master_netdev; + struct net_device *master = dsa_master_netdev(p); struct dsa_switch *ds = p->dp->ds; if (p->phy) @@ -190,7 +146,7 @@ static int dsa_slave_close(struct net_device *dev) if (ds->ops->port_disable) ds->ops->port_disable(ds, p->dp->index, p->phy); - dsa_slave_set_state(dev, BR_STATE_DISABLED); + dsa_port_set_state_now(p->dp, BR_STATE_DISABLED); return 0; } @@ -198,7 +154,7 @@ static int dsa_slave_close(struct net_device *dev) static void dsa_slave_change_rx_flags(struct net_device *dev, int change) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->dp->ds->dst->master_netdev; + struct net_device *master = dsa_master_netdev(p); if (change & IFF_ALLMULTI) dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); @@ -209,7 +165,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change) static void dsa_slave_set_rx_mode(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->dp->ds->dst->master_netdev; + struct net_device *master = dsa_master_netdev(p); dev_mc_sync(master, dev); dev_uc_sync(master, dev); @@ -218,7 +174,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev) static int dsa_slave_set_mac_address(struct net_device *dev, void *a) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->dp->ds->dst->master_netdev; + struct net_device *master = dsa_master_netdev(p); struct sockaddr *addr = a; int err; @@ -243,140 +199,6 @@ out: return 0; } -static int dsa_slave_port_vlan_add(struct net_device *dev, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; - struct dsa_switch *ds = dp->ds; - - if (switchdev_trans_ph_prepare(trans)) { - if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) - return -EOPNOTSUPP; - - return ds->ops->port_vlan_prepare(ds, dp->index, vlan, trans); - } - - ds->ops->port_vlan_add(ds, dp->index, vlan, trans); - - return 0; -} - -static int dsa_slave_port_vlan_del(struct net_device *dev, - const struct switchdev_obj_port_vlan *vlan) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (!ds->ops->port_vlan_del) - return -EOPNOTSUPP; - - return ds->ops->port_vlan_del(ds, p->dp->index, vlan); -} - -static int dsa_slave_port_vlan_dump(struct net_device *dev, - struct switchdev_obj_port_vlan *vlan, - switchdev_obj_dump_cb_t *cb) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (ds->ops->port_vlan_dump) - return ds->ops->port_vlan_dump(ds, p->dp->index, vlan, cb); - - return -EOPNOTSUPP; -} - -static int dsa_slave_port_fdb_add(struct net_device *dev, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (switchdev_trans_ph_prepare(trans)) { - if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add) - return -EOPNOTSUPP; - - return ds->ops->port_fdb_prepare(ds, p->dp->index, fdb, trans); - } - - ds->ops->port_fdb_add(ds, p->dp->index, fdb, trans); - - return 0; -} - -static int dsa_slave_port_fdb_del(struct net_device *dev, - const struct switchdev_obj_port_fdb *fdb) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - int ret = -EOPNOTSUPP; - - if (ds->ops->port_fdb_del) - ret = ds->ops->port_fdb_del(ds, p->dp->index, fdb); - - return ret; -} - -static int dsa_slave_port_fdb_dump(struct net_device *dev, - struct switchdev_obj_port_fdb *fdb, - switchdev_obj_dump_cb_t *cb) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (ds->ops->port_fdb_dump) - return ds->ops->port_fdb_dump(ds, p->dp->index, fdb, cb); - - return -EOPNOTSUPP; -} - -static int dsa_slave_port_mdb_add(struct net_device *dev, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (switchdev_trans_ph_prepare(trans)) { - if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) - return -EOPNOTSUPP; - - return ds->ops->port_mdb_prepare(ds, p->dp->index, mdb, trans); - } - - ds->ops->port_mdb_add(ds, p->dp->index, mdb, trans); - - return 0; -} - -static int dsa_slave_port_mdb_del(struct net_device *dev, - const struct switchdev_obj_port_mdb *mdb) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (ds->ops->port_mdb_del) - return ds->ops->port_mdb_del(ds, p->dp->index, mdb); - - return -EOPNOTSUPP; -} - -static int dsa_slave_port_mdb_dump(struct net_device *dev, - struct switchdev_obj_port_mdb *mdb, - switchdev_obj_dump_cb_t *cb) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (ds->ops->port_mdb_dump) - return ds->ops->port_mdb_dump(ds, p->dp->index, mdb, cb); - - return -EOPNOTSUPP; -} - static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -387,96 +209,24 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EOPNOTSUPP; } -static int dsa_slave_stp_state_set(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (switchdev_trans_ph_prepare(trans)) - return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; - - dsa_slave_set_state(dev, attr->u.stp_state); - - return 0; -} - -static int dsa_slave_vlan_filtering(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - /* bridge skips -EOPNOTSUPP, so skip the prepare phase */ - if (switchdev_trans_ph_prepare(trans)) - return 0; - - if (ds->ops->port_vlan_filtering) - return ds->ops->port_vlan_filtering(ds, p->dp->index, - attr->u.vlan_filtering); - - return 0; -} - -static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds, - unsigned int ageing_time) -{ - int i; - - for (i = 0; i < ds->num_ports; ++i) { - struct dsa_port *dp = &ds->ports[i]; - - if (dp && dp->ageing_time && dp->ageing_time < ageing_time) - ageing_time = dp->ageing_time; - } - - return ageing_time; -} - -static int dsa_slave_ageing_time(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time); - unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies); - - if (switchdev_trans_ph_prepare(trans)) { - if (ds->ageing_time_min && ageing_time < ds->ageing_time_min) - return -ERANGE; - if (ds->ageing_time_max && ageing_time > ds->ageing_time_max) - return -ERANGE; - return 0; - } - - /* Keep the fastest ageing time in case of multiple bridges */ - p->dp->ageing_time = ageing_time; - ageing_time = dsa_fastest_ageing_time(ds, ageing_time); - - if (ds->ops->set_ageing_time) - return ds->ops->set_ageing_time(ds, ageing_time); - - return 0; -} - static int dsa_slave_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct switchdev_trans *trans) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = p->dp; int ret; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - ret = dsa_slave_stp_state_set(dev, attr, trans); + ret = dsa_port_set_state(dp, attr->u.stp_state, trans); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: - ret = dsa_slave_vlan_filtering(dev, attr, trans); + ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering, + trans); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: - ret = dsa_slave_ageing_time(dev, attr, trans); + ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans); break; default: ret = -EOPNOTSUPP; @@ -490,6 +240,8 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = p->dp; int err; /* For the prepare phase, ensure the full set of changes is feasable in @@ -499,18 +251,14 @@ static int dsa_slave_port_obj_add(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_FDB: - err = dsa_slave_port_fdb_add(dev, - SWITCHDEV_OBJ_PORT_FDB(obj), - trans); + err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj), trans); break; case SWITCHDEV_OBJ_ID_PORT_MDB: - err = dsa_slave_port_mdb_add(dev, SWITCHDEV_OBJ_PORT_MDB(obj), - trans); + err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = dsa_slave_port_vlan_add(dev, - SWITCHDEV_OBJ_PORT_VLAN(obj), - trans); + err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), + trans); break; default: err = -EOPNOTSUPP; @@ -523,19 +271,19 @@ static int dsa_slave_port_obj_add(struct net_device *dev, static int dsa_slave_port_obj_del(struct net_device *dev, const struct switchdev_obj *obj) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = p->dp; int err; switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_FDB: - err = dsa_slave_port_fdb_del(dev, - SWITCHDEV_OBJ_PORT_FDB(obj)); + err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_MDB: - err = dsa_slave_port_mdb_del(dev, SWITCHDEV_OBJ_PORT_MDB(obj)); + err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = dsa_slave_port_vlan_del(dev, - SWITCHDEV_OBJ_PORT_VLAN(obj)); + err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; default: err = -EOPNOTSUPP; @@ -549,22 +297,19 @@ static int dsa_slave_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, switchdev_obj_dump_cb_t *cb) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = p->dp; int err; switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_FDB: - err = dsa_slave_port_fdb_dump(dev, - SWITCHDEV_OBJ_PORT_FDB(obj), - cb); + err = dsa_port_fdb_dump(dp, SWITCHDEV_OBJ_PORT_FDB(obj), cb); break; case SWITCHDEV_OBJ_ID_PORT_MDB: - err = dsa_slave_port_mdb_dump(dev, SWITCHDEV_OBJ_PORT_MDB(obj), - cb); + err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = dsa_slave_port_vlan_dump(dev, - SWITCHDEV_OBJ_PORT_VLAN(obj), - cb); + err = dsa_port_vlan_dump(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), cb); break; default: err = -EOPNOTSUPP; @@ -574,57 +319,6 @@ static int dsa_slave_port_obj_dump(struct net_device *dev, return err; } -static int dsa_slave_bridge_port_join(struct net_device *dev, - struct net_device *br) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_notifier_bridge_info info = { - .sw_index = p->dp->ds->index, - .port = p->dp->index, - .br = br, - }; - int err; - - /* Here the port is already bridged. Reflect the current configuration - * so that drivers can program their chips accordingly. - */ - p->dp->bridge_dev = br; - - err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_JOIN, &info); - - /* The bridging is rolled back on error */ - if (err) - p->dp->bridge_dev = NULL; - - return err; -} - -static void dsa_slave_bridge_port_leave(struct net_device *dev, - struct net_device *br) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_notifier_bridge_info info = { - .sw_index = p->dp->ds->index, - .port = p->dp->index, - .br = br, - }; - int err; - - /* Here the port is already unbridged. Reflect the current configuration - * so that drivers can program their chips accordingly. - */ - p->dp->bridge_dev = NULL; - - err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_LEAVE, &info); - if (err) - netdev_err(dev, "failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); - - /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, - * so allow it to be in BR_STATE_FORWARDING to be kept functional - */ - dsa_slave_set_state(dev, BR_STATE_FORWARDING); -} - static int dsa_slave_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { @@ -663,10 +357,14 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; - /* Transmit function may have to reallocate the original SKB */ + /* Transmit function may have to reallocate the original SKB, + * in which case it must have freed it. Only free it here on error. + */ nskb = p->xmit(skb, dev); - if (!nskb) + if (!nskb) { + kfree_skb(skb); return NETDEV_TX_OK; + } /* SKB for netpoll still need to be mangled with the protocol-specific * tag to be successfully transmitted @@ -677,7 +375,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) /* Queue the SKB for transmission on the parent interface, but * do not modify its EtherType */ - nskb->dev = p->dp->ds->dst->master_netdev; + nskb->dev = dsa_master_netdev(p); dev_queue_xmit(nskb); return NETDEV_TX_OK; @@ -689,12 +387,13 @@ dsa_slave_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { struct dsa_slave_priv *p = netdev_priv(dev); - int err = -EOPNOTSUPP; - if (p->phy != NULL) - err = phy_ethtool_ksettings_get(p->phy, cmd); + if (!p->phy) + return -EOPNOTSUPP; - return err; + phy_ethtool_ksettings_get(p->phy, cmd); + + return 0; } static int @@ -821,14 +520,14 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, uint64_t *data) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds = dst->cpu_switch; - s8 cpu_port = dst->cpu_port; + struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); + struct dsa_switch *ds = cpu_dp->ds; + s8 cpu_port = cpu_dp->index; int count = 0; - if (dst->master_ethtool_ops.get_sset_count) { - count = dst->master_ethtool_ops.get_sset_count(dev, - ETH_SS_STATS); - dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data); + if (cpu_dp->ethtool_ops.get_sset_count) { + count = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS); + cpu_dp->ethtool_ops.get_ethtool_stats(dev, stats, data); } if (ds->ops->get_ethtool_stats) @@ -838,11 +537,12 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds = dst->cpu_switch; + struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); + struct dsa_switch *ds = cpu_dp->ds; int count = 0; - if (dst->master_ethtool_ops.get_sset_count) - count += dst->master_ethtool_ops.get_sset_count(dev, sset); + if (cpu_dp->ethtool_ops.get_sset_count) + count += cpu_dp->ethtool_ops.get_sset_count(dev, sset); if (sset == ETH_SS_STATS && ds->ops->get_sset_count) count += ds->ops->get_sset_count(ds); @@ -854,8 +554,9 @@ static void dsa_cpu_port_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds = dst->cpu_switch; - s8 cpu_port = dst->cpu_port; + struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); + struct dsa_switch *ds = cpu_dp->ds; + s8 cpu_port = cpu_dp->index; int len = ETH_GSTRING_LEN; int mcount = 0, count; unsigned int i; @@ -866,10 +567,9 @@ static void dsa_cpu_port_get_strings(struct net_device *dev, /* We do not want to be NULL-terminated, since this is a prefix */ pfx[sizeof(pfx) - 1] = '_'; - if (dst->master_ethtool_ops.get_sset_count) { - mcount = dst->master_ethtool_ops.get_sset_count(dev, - ETH_SS_STATS); - dst->master_ethtool_ops.get_strings(dev, stringset, data); + if (cpu_dp->ethtool_ops.get_sset_count) { + mcount = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS); + cpu_dp->ethtool_ops.get_strings(dev, stringset, data); } if (stringset == ETH_SS_STATS && ds->ops->get_strings) { @@ -985,8 +685,7 @@ static int dsa_slave_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) { struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - struct net_device *master = ds->dst->master_netdev; + struct net_device *master = dsa_master_netdev(p); struct netpoll *netpoll; int err = 0; @@ -1138,10 +837,13 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev, } static int dsa_slave_setup_tc(struct net_device *dev, u32 handle, - __be16 protocol, struct tc_to_netdev *tc) + u32 chain_index, __be16 protocol, + struct tc_to_netdev *tc) { bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS); - int ret = -EOPNOTSUPP; + + if (chain_index) + return -EOPNOTSUPP; switch (tc->type) { case TC_SETUP_MATCHALL: @@ -1155,10 +857,8 @@ static int dsa_slave_setup_tc(struct net_device *dev, u32 handle, return 0; } default: - break; + return -EOPNOTSUPP; } - - return ret; } void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops) @@ -1441,11 +1141,11 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, struct net_device *master; struct net_device *slave_dev; struct dsa_slave_priv *p; + struct dsa_port *cpu_dp; int ret; - master = ds->dst->master_netdev; - if (ds->master_netdev) - master = ds->master_netdev; + cpu_dp = ds->dst->cpu_dp; + master = cpu_dp->netdev; slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name, NET_NAME_UNKNOWN, ether_setup); @@ -1528,14 +1228,16 @@ static bool dsa_slave_dev_check(struct net_device *dev) static int dsa_slave_changeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = p->dp; int err = NOTIFY_DONE; if (netif_is_bridge_master(info->upper_dev)) { if (info->linking) { - err = dsa_slave_bridge_port_join(dev, info->upper_dev); + err = dsa_port_bridge_join(dp, info->upper_dev); err = notifier_from_errno(err); } else { - dsa_slave_bridge_port_leave(dev, info->upper_dev); + dsa_port_bridge_leave(dp, info->upper_dev); err = NOTIFY_OK; } } diff --git a/net/dsa/switch.c b/net/dsa/switch.c index ca6e26e514f0..f1029a8d0e20 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -12,7 +12,47 @@ #include <linux/netdevice.h> #include <linux/notifier.h> -#include <net/dsa.h> +#include <net/switchdev.h> + +#include "dsa_priv.h" + +static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds, + unsigned int ageing_time) +{ + int i; + + for (i = 0; i < ds->num_ports; ++i) { + struct dsa_port *dp = &ds->ports[i]; + + if (dp->ageing_time && dp->ageing_time < ageing_time) + ageing_time = dp->ageing_time; + } + + return ageing_time; +} + +static int dsa_switch_ageing_time(struct dsa_switch *ds, + struct dsa_notifier_ageing_time_info *info) +{ + unsigned int ageing_time = info->ageing_time; + struct switchdev_trans *trans = info->trans; + + if (switchdev_trans_ph_prepare(trans)) { + if (ds->ageing_time_min && ageing_time < ds->ageing_time_min) + return -ERANGE; + if (ds->ageing_time_max && ageing_time > ds->ageing_time_max) + return -ERANGE; + return 0; + } + + /* Program the fastest ageing time in case of multiple bridges */ + ageing_time = dsa_switch_fastest_ageing_time(ds, ageing_time); + + if (ds->ops->set_ageing_time) + return ds->ops->set_ageing_time(ds, ageing_time); + + return 0; +} static int dsa_switch_bridge_join(struct dsa_switch *ds, struct dsa_notifier_bridge_info *info) @@ -40,6 +80,127 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, return 0; } +static int dsa_switch_fdb_add(struct dsa_switch *ds, + struct dsa_notifier_fdb_info *info) +{ + const struct switchdev_obj_port_fdb *fdb = info->fdb; + struct switchdev_trans *trans = info->trans; + + /* Do not care yet about other switch chips of the fabric */ + if (ds->index != info->sw_index) + return 0; + + if (switchdev_trans_ph_prepare(trans)) { + if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add) + return -EOPNOTSUPP; + + return ds->ops->port_fdb_prepare(ds, info->port, fdb, trans); + } + + ds->ops->port_fdb_add(ds, info->port, fdb, trans); + + return 0; +} + +static int dsa_switch_fdb_del(struct dsa_switch *ds, + struct dsa_notifier_fdb_info *info) +{ + const struct switchdev_obj_port_fdb *fdb = info->fdb; + + /* Do not care yet about other switch chips of the fabric */ + if (ds->index != info->sw_index) + return 0; + + if (!ds->ops->port_fdb_del) + return -EOPNOTSUPP; + + return ds->ops->port_fdb_del(ds, info->port, fdb); +} + +static int dsa_switch_mdb_add(struct dsa_switch *ds, + struct dsa_notifier_mdb_info *info) +{ + const struct switchdev_obj_port_mdb *mdb = info->mdb; + struct switchdev_trans *trans = info->trans; + + /* Do not care yet about other switch chips of the fabric */ + if (ds->index != info->sw_index) + return 0; + + if (switchdev_trans_ph_prepare(trans)) { + if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) + return -EOPNOTSUPP; + + return ds->ops->port_mdb_prepare(ds, info->port, mdb, trans); + } + + ds->ops->port_mdb_add(ds, info->port, mdb, trans); + + return 0; +} + +static int dsa_switch_mdb_del(struct dsa_switch *ds, + struct dsa_notifier_mdb_info *info) +{ + const struct switchdev_obj_port_mdb *mdb = info->mdb; + + /* Do not care yet about other switch chips of the fabric */ + if (ds->index != info->sw_index) + return 0; + + if (!ds->ops->port_mdb_del) + return -EOPNOTSUPP; + + return ds->ops->port_mdb_del(ds, info->port, mdb); +} + +static int dsa_switch_vlan_add(struct dsa_switch *ds, + struct dsa_notifier_vlan_info *info) +{ + const struct switchdev_obj_port_vlan *vlan = info->vlan; + struct switchdev_trans *trans = info->trans; + DECLARE_BITMAP(members, ds->num_ports); + int port, err; + + /* Build a mask of VLAN members */ + bitmap_zero(members, ds->num_ports); + if (ds->index == info->sw_index) + set_bit(info->port, members); + for (port = 0; port < ds->num_ports; port++) + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) + set_bit(port, members); + + if (switchdev_trans_ph_prepare(trans)) { + if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) + return -EOPNOTSUPP; + + for_each_set_bit(port, members, ds->num_ports) { + err = ds->ops->port_vlan_prepare(ds, port, vlan, trans); + if (err) + return err; + } + } + + for_each_set_bit(port, members, ds->num_ports) + ds->ops->port_vlan_add(ds, port, vlan, trans); + + return 0; +} + +static int dsa_switch_vlan_del(struct dsa_switch *ds, + struct dsa_notifier_vlan_info *info) +{ + const struct switchdev_obj_port_vlan *vlan = info->vlan; + + if (!ds->ops->port_vlan_del) + return -EOPNOTSUPP; + + if (ds->index == info->sw_index) + return ds->ops->port_vlan_del(ds, info->port, vlan); + + return 0; +} + static int dsa_switch_event(struct notifier_block *nb, unsigned long event, void *info) { @@ -47,12 +208,33 @@ static int dsa_switch_event(struct notifier_block *nb, int err; switch (event) { + case DSA_NOTIFIER_AGEING_TIME: + err = dsa_switch_ageing_time(ds, info); + break; case DSA_NOTIFIER_BRIDGE_JOIN: err = dsa_switch_bridge_join(ds, info); break; case DSA_NOTIFIER_BRIDGE_LEAVE: err = dsa_switch_bridge_leave(ds, info); break; + case DSA_NOTIFIER_FDB_ADD: + err = dsa_switch_fdb_add(ds, info); + break; + case DSA_NOTIFIER_FDB_DEL: + err = dsa_switch_fdb_del(ds, info); + break; + case DSA_NOTIFIER_MDB_ADD: + err = dsa_switch_mdb_add(ds, info); + break; + case DSA_NOTIFIER_MDB_DEL: + err = dsa_switch_mdb_del(ds, info); + break; + case DSA_NOTIFIER_VLAN_ADD: + err = dsa_switch_vlan_add(ds, info); + break; + case DSA_NOTIFIER_VLAN_DEL: + err = dsa_switch_vlan_del(ds, info); + break; default: err = -EOPNOTSUPP; break; diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 2a9b52c5af86..c697d9815177 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -12,7 +12,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> -#include <net/dsa.h> + #include "dsa_priv.h" /* This tag length is 4 bytes, older ones were 6 bytes, we do not @@ -65,7 +65,7 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev u8 *brcm_tag; if (skb_cow_head(skb, BRCM_TAG_LEN) < 0) - goto out_free; + return NULL; skb_push(skb, BRCM_TAG_LEN); @@ -86,10 +86,6 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev brcm_tag[3] = (1 << p->dp->index) & BRCM_IG_DSTMAP1_MASK; return skb; - -out_free: - kfree_skb(skb); - return NULL; } static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, @@ -97,34 +93,33 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; + struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); + struct dsa_switch *ds = cpu_dp->ds; int source_port; u8 *brcm_tag; - ds = dst->cpu_switch; - if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN))) - goto out_drop; + return NULL; /* skb->data points to the EtherType, the tag is right before it */ brcm_tag = skb->data - 2; /* The opcode should never be different than 0b000 */ if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK)) - goto out_drop; + return NULL; /* We should never see a reserved reason code without knowing how to * handle it */ if (unlikely(brcm_tag[2] & BRCM_EG_RC_RSVD)) - goto out_drop; + return NULL; /* Locate which port this is coming from */ source_port = brcm_tag[3] & BRCM_EG_PID_MASK; /* Validate port against switch setup, either the port is totally */ if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) - goto out_drop; + return NULL; /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, BRCM_TAG_LEN); @@ -137,9 +132,6 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = ds->ports[source_port].netdev; return skb; - -out_drop: - return NULL; } const struct dsa_device_ops brcm_netdev_ops = { diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 1c6633f0de01..12867a4b458f 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -11,7 +11,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> -#include <net/dsa.h> + #include "dsa_priv.h" #define DSA_HLEN 4 @@ -28,7 +28,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) */ if (skb->protocol == htons(ETH_P_8021Q)) { if (skb_cow_head(skb, 0) < 0) - goto out_free; + return NULL; /* * Construct tagged FROM_CPU DSA tag from 802.1q tag. @@ -46,7 +46,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) } } else { if (skb_cow_head(skb, DSA_HLEN) < 0) - goto out_free; + return NULL; skb_push(skb, DSA_HLEN); memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); @@ -62,10 +62,6 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) } return skb; - -out_free: - kfree_skb(skb); - return NULL; } static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, @@ -79,7 +75,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, int source_port; if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) - goto out_drop; + return NULL; /* * The ethertype field is part of the DSA header. @@ -90,7 +86,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, * Check that frame type is either TO_CPU or FORWARD. */ if ((dsa_header[0] & 0xc0) != 0x00 && (dsa_header[0] & 0xc0) != 0xc0) - goto out_drop; + return NULL; /* * Determine source device and port. @@ -103,14 +99,14 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, * port is a registered DSA port. */ if (source_device >= DSA_MAX_SWITCHES) - goto out_drop; + return NULL; ds = dst->ds[source_device]; if (!ds) - goto out_drop; + return NULL; if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) - goto out_drop; + return NULL; /* * Convert the DSA header to an 802.1q header if the 'tagged' @@ -161,9 +157,6 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = ds->ports[source_port].netdev; return skb; - -out_drop: - return NULL; } const struct dsa_device_ops dsa_netdev_ops = { diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index d9c668aa5e54..67a9d26f9075 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -11,7 +11,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> -#include <net/dsa.h> + #include "dsa_priv.h" #define DSA_HLEN 4 @@ -30,7 +30,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) */ if (skb->protocol == htons(ETH_P_8021Q)) { if (skb_cow_head(skb, DSA_HLEN) < 0) - goto out_free; + return NULL; skb_push(skb, DSA_HLEN); memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); @@ -55,7 +55,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) } } else { if (skb_cow_head(skb, EDSA_HLEN) < 0) - goto out_free; + return NULL; skb_push(skb, EDSA_HLEN); memmove(skb->data, skb->data + EDSA_HLEN, 2 * ETH_ALEN); @@ -75,10 +75,6 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) } return skb; - -out_free: - kfree_skb(skb); - return NULL; } static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, @@ -92,7 +88,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, int source_port; if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) - goto out_drop; + return NULL; /* * Skip the two null bytes after the ethertype. @@ -103,7 +99,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, * Check that frame type is either TO_CPU or FORWARD. */ if ((edsa_header[0] & 0xc0) != 0x00 && (edsa_header[0] & 0xc0) != 0xc0) - goto out_drop; + return NULL; /* * Determine source device and port. @@ -116,14 +112,14 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, * port is a registered DSA port. */ if (source_device >= DSA_MAX_SWITCHES) - goto out_drop; + return NULL; ds = dst->ds[source_device]; if (!ds) - goto out_drop; + return NULL; if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) - goto out_drop; + return NULL; /* * If the 'tagged' bit is set, convert the DSA tag to a 802.1q @@ -180,9 +176,6 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = ds->ports[source_port].netdev; return skb; - -out_drop: - return NULL; } const struct dsa_device_ops edsa_netdev_ops = { diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c new file mode 100644 index 000000000000..fab41de8e983 --- /dev/null +++ b/net/dsa/tag_ksz.c @@ -0,0 +1,99 @@ +/* + * net/dsa/tag_ksz.c - Microchip KSZ Switch tag format handling + * Copyright (c) 2017 Microchip Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <net/dsa.h> +#include "dsa_priv.h" + +/* For Ingress (Host -> KSZ), 2 bytes are added before FCS. + * --------------------------------------------------------------------------- + * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes) + * --------------------------------------------------------------------------- + * tag0 : Prioritization (not used now) + * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5) + * + * For Egress (KSZ -> Host), 1 byte is added before FCS. + * --------------------------------------------------------------------------- + * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes) + * --------------------------------------------------------------------------- + * tag0 : zero-based value represents port + * (eg, 0x00=port1, 0x02=port3, 0x06=port7) + */ + +#define KSZ_INGRESS_TAG_LEN 2 +#define KSZ_EGRESS_TAG_LEN 1 + +static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct sk_buff *nskb; + int padlen; + u8 *tag; + + padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len; + + if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) { + nskb = skb; + } else { + nskb = alloc_skb(NET_IP_ALIGN + skb->len + + padlen + KSZ_INGRESS_TAG_LEN, GFP_ATOMIC); + if (!nskb) + return NULL; + skb_reserve(nskb, NET_IP_ALIGN); + + skb_reset_mac_header(nskb); + skb_set_network_header(nskb, + skb_network_header(skb) - skb->head); + skb_set_transport_header(nskb, + skb_transport_header(skb) - skb->head); + skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); + kfree_skb(skb); + } + + /* skb is freed when it fails */ + if (skb_put_padto(nskb, nskb->len + padlen)) + return NULL; + + tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); + tag[0] = 0; + tag[1] = 1 << p->dp->index; /* destination port */ + + return nskb; +} + +static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); + struct dsa_switch *ds = cpu_dp->ds; + u8 *tag; + int source_port; + + tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; + + source_port = tag[0] & 7; + if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + return NULL; + + pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN); + + skb->dev = ds->ports[source_port].netdev; + + return skb; +} + +const struct dsa_device_ops ksz_netdev_ops = { + .xmit = ksz_xmit, + .rcv = ksz_rcv, +}; diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 70130ed5c21a..247774d149f9 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -14,7 +14,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> -#include <net/dsa.h> + #include "dsa_priv.h" /* To define the outgoing port and to discover the incoming port a regular @@ -52,7 +52,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) if (skb_cow_head(skb, LAN9303_TAG_LEN) < 0) { dev_dbg(&dev->dev, "Cannot make room for the special tag. Dropping packet\n"); - goto out_free; + return NULL; } /* provide 'LAN9303_TAG_LEN' bytes additional space */ @@ -66,9 +66,6 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) lan9303_tag[1] = htons(p->dp->index | BIT(4)); return skb; -out_free: - kfree_skb(skb); - return NULL; } static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 837cdddb53f0..2f32b7ea3365 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -13,7 +13,7 @@ */ #include <linux/etherdevice.h> -#include <net/dsa.h> + #include "dsa_priv.h" #define MTK_HDR_LEN 4 @@ -27,7 +27,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, u8 *mtk_tag; if (skb_cow_head(skb, MTK_HDR_LEN) < 0) - goto out_free; + return NULL; skb_push(skb, MTK_HDR_LEN); @@ -41,10 +41,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, mtk_tag[3] = 0; return skb; - -out_free: - kfree_skb(skb); - return NULL; } static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, @@ -57,7 +53,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, __be16 *phdr, hdr; if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN))) - goto out_drop; + return NULL; /* The MTK header is added by the switch between src addr * and ethertype at this point, skb->data points to 2 bytes @@ -79,19 +75,16 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, */ ds = dst->ds[0]; if (!ds) - goto out_drop; + return NULL; /* Get source port information */ port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); if (!ds->ports[port].netdev) - goto out_drop; + return NULL; skb->dev = ds->ports[port].netdev; return skb; - -out_drop: - return NULL; } const struct dsa_device_ops mtk_netdev_ops = { diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 3ba3f59f7a34..1867a3d11f28 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -12,7 +12,7 @@ */ #include <linux/etherdevice.h> -#include <net/dsa.h> + #include "dsa_priv.h" #define QCA_HDR_LEN 2 @@ -45,7 +45,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_bytes += skb->len; if (skb_cow_head(skb, 0) < 0) - goto out_free; + return NULL; skb_push(skb, QCA_HDR_LEN); @@ -60,10 +60,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) *phdr = htons(hdr); return skb; - -out_free: - kfree_skb(skb); - return NULL; } static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, @@ -71,13 +67,14 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); struct dsa_switch *ds; u8 ver; int port; __be16 *phdr, hdr; if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN))) - goto out_drop; + return NULL; /* The QCA header is added by the switch between src addr and Ethertype * At this point, skb->data points to ethertype so header should be @@ -89,7 +86,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, /* Make sure the version is correct */ ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S; if (unlikely(ver != QCA_HDR_VERSION)) - goto out_drop; + return NULL; /* Remove QCA tag and recalculate checksum */ skb_pull_rcsum(skb, QCA_HDR_LEN); @@ -99,22 +96,19 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, /* This protocol doesn't support cascading multiple switches so it's * safe to assume the switch is first in the tree */ - ds = dst->cpu_switch; + ds = cpu_dp->ds; if (!ds) - goto out_drop; + return NULL; /* Get source port information */ port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); if (!ds->ports[port].netdev) - goto out_drop; + return NULL; /* Update skb & forward the frame accordingly */ skb->dev = ds->ports[port].netdev; return skb; - -out_drop: - return NULL; } const struct dsa_device_ops qca_netdev_ops = { diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index aafc2fc74c30..172f13167896 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -11,7 +11,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> -#include <net/dsa.h> + #include "dsa_priv.h" static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) @@ -32,10 +32,8 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) padlen = 60 - skb->len; nskb = alloc_skb(NET_IP_ALIGN + skb->len + padlen + 4, GFP_ATOMIC); - if (nskb == NULL) { - kfree_skb(skb); + if (!nskb) return NULL; - } skb_reserve(nskb, NET_IP_ALIGN); skb_reset_mac_header(nskb); @@ -63,32 +61,28 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; + struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); + struct dsa_switch *ds = cpu_dp->ds; u8 *trailer; int source_port; - ds = dst->cpu_switch; - if (skb_linearize(skb)) - goto out_drop; + return NULL; trailer = skb_tail_pointer(skb) - 4; if (trailer[0] != 0x80 || (trailer[1] & 0xf8) != 0x00 || (trailer[2] & 0xef) != 0x00 || trailer[3] != 0x00) - goto out_drop; + return NULL; source_port = trailer[1] & 7; if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) - goto out_drop; + return NULL; pskb_trim_rcsum(skb, skb->len - 4); skb->dev = ds->ports[source_port].netdev; return skb; - -out_drop: - return NULL; } const struct dsa_device_ops trailer_netdev_ops = { diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index eedba7670b51..a60658c85a9a 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -301,15 +301,14 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) goto out_skb; skb->dev = dev; - skb->sk = sk; skb->protocol = htons(ETH_P_IEEE802154); - dev_put(dev); - err = dev_queue_xmit(skb); if (err > 0) err = net_xmit_errno(err); + dev_put(dev); + return err ?: size; out_skb: @@ -690,15 +689,14 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) goto out_skb; skb->dev = dev; - skb->sk = sk; skb->protocol = htons(ETH_P_IEEE802154); - dev_put(dev); - err = dev_queue_xmit(skb); if (err > 0) err = net_xmit_errno(err); + dev_put(dev); + return err ?: size; out_skb: diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 22377c8ff14b..e8f862358518 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -220,7 +220,9 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ @@ -393,7 +395,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, ihl); sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index e9f3386a528b..a651c53260ec 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1113,13 +1113,17 @@ static int arp_invalidate(struct net_device *dev, __be32 ip) { struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev); int err = -ENXIO; + struct neigh_table *tbl = &arp_tbl; if (neigh) { if (neigh->nud_state & ~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_ADMIN, 0); + write_lock_bh(&tbl->lock); neigh_release(neigh); + neigh_remove_one(neigh, tbl); + write_unlock_bh(&tbl->lock); } return err; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index df14815a3b8c..a7dd088d5fc9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -176,6 +176,7 @@ EXPORT_SYMBOL(__ip_dev_find); static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); +static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain); static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy); #ifdef CONFIG_SYSCTL @@ -441,6 +442,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary; + struct in_validator_info ivi; + int ret; ASSERT_RTNL(); @@ -471,6 +474,23 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, } } + /* Allow any devices that wish to register ifaddr validtors to weigh + * in now, before changes are committed. The rntl lock is serializing + * access here, so the state should not change between a validator call + * and a final notify on commit. This isn't invoked on promotion under + * the assumption that validators are checking the address itself, and + * not the flags. + */ + ivi.ivi_addr = ifa->ifa_address; + ivi.ivi_dev = ifa->ifa_dev; + ret = blocking_notifier_call_chain(&inetaddr_validator_chain, + NETDEV_UP, &ivi); + ret = notifier_to_errno(ret); + if (ret) { + inet_free_ifa(ifa); + return ret; + } + if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { prandom_seed((__force u32) ifa->ifa_local); ifap = last_primary; @@ -1356,6 +1376,19 @@ int unregister_inetaddr_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_inetaddr_notifier); +int register_inetaddr_validator_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&inetaddr_validator_chain, nb); +} +EXPORT_SYMBOL(register_inetaddr_validator_notifier); + +int unregister_inetaddr_validator_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&inetaddr_validator_chain, + nb); +} +EXPORT_SYMBOL(unregister_inetaddr_validator_notifier); + /* Rename ifa_labels for a device name change. Make some effort to preserve * existing alias numbering and to create unique labels if possible. */ diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 93322f895eab..d815d1755473 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -377,9 +377,11 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * esp->esph = esph; sg_init_table(sg, esp->nfrags); - skb_to_sgvec(skb, sg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + esp->clen + alen); + err = skb_to_sgvec(skb, sg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + esp->clen + alen); + if (unlikely(err < 0)) + goto error; if (!esp->inplace) { int allocsize; @@ -403,9 +405,11 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * spin_unlock_bh(&x->lock); sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1); - skb_to_sgvec(skb, dsg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + esp->clen + alen); + err = skb_to_sgvec(skb, dsg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + esp->clen + alen); + if (unlikely(err < 0)) + goto error; } if ((x->props.flags & XFRM_STATE_ESN)) @@ -690,7 +694,9 @@ skip_cow: esp_input_set_header(skb, seqhi); sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + err = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 83e3ed258467..4e678fa892dd 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -588,13 +588,15 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (cmd == SIOCDELRT) { tb = fib_get_table(net, cfg.fc_table); if (tb) - err = fib_table_delete(net, tb, &cfg); + err = fib_table_delete(net, tb, &cfg, + NULL); else err = -ESRCH; } else { tb = fib_new_table(net, cfg.fc_table); if (tb) - err = fib_table_insert(net, tb, &cfg); + err = fib_table_insert(net, tb, + &cfg, NULL); else err = -ENOBUFS; } @@ -626,14 +628,15 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_config *cfg) + struct nlmsghdr *nlh, struct fib_config *cfg, + struct netlink_ext_ack *extack) { struct nlattr *attr; int err, remaining; struct rtmsg *rtm; err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy, - NULL); + extack); if (err < 0) goto errout; @@ -654,6 +657,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg->fc_nlinfo.nl_net = net; if (cfg->fc_type > RTN_MAX) { + NL_SET_ERR_MSG(extack, "Invalid route type"); err = -EINVAL; goto errout; } @@ -681,7 +685,8 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, break; case RTA_MULTIPATH: err = lwtunnel_valid_encap_type_attr(nla_data(attr), - nla_len(attr)); + nla_len(attr), + extack); if (err < 0) goto errout; cfg->fc_mp = nla_data(attr); @@ -698,7 +703,8 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, break; case RTA_ENCAP_TYPE: cfg->fc_encap_type = nla_get_u16(attr); - err = lwtunnel_valid_encap_type(cfg->fc_encap_type); + err = lwtunnel_valid_encap_type(cfg->fc_encap_type, + extack); if (err < 0) goto errout; break; @@ -718,17 +724,18 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, struct fib_table *tb; int err; - err = rtm_to_fib_config(net, skb, nlh, &cfg); + err = rtm_to_fib_config(net, skb, nlh, &cfg, extack); if (err < 0) goto errout; tb = fib_get_table(net, cfg.fc_table); if (!tb) { + NL_SET_ERR_MSG(extack, "FIB table does not exist"); err = -ESRCH; goto errout; } - err = fib_table_delete(net, tb, &cfg); + err = fib_table_delete(net, tb, &cfg, extack); errout: return err; } @@ -741,7 +748,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, struct fib_table *tb; int err; - err = rtm_to_fib_config(net, skb, nlh, &cfg); + err = rtm_to_fib_config(net, skb, nlh, &cfg, extack); if (err < 0) goto errout; @@ -751,7 +758,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - err = fib_table_insert(net, tb, &cfg); + err = fib_table_insert(net, tb, &cfg, extack); errout: return err; } @@ -845,9 +852,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad cfg.fc_scope = RT_SCOPE_HOST; if (cmd == RTM_NEWROUTE) - fib_table_insert(net, tb, &cfg); + fib_table_insert(net, tb, &cfg, NULL); else - fib_table_delete(net, tb, &cfg); + fib_table_delete(net, tb, &cfg, NULL); } void fib_add_ifaddr(struct in_ifaddr *ifa) diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 9c02920725db..769ab87ebc4b 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -28,8 +28,10 @@ static inline void fib_alias_accessed(struct fib_alias *fa) /* Exported by fib_semantics.c */ void fib_release_info(struct fib_info *); -struct fib_info *fib_create_info(struct fib_config *cfg); -int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); +struct fib_info *fib_create_info(struct fib_config *cfg, + struct netlink_ext_ack *extack); +int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, + struct netlink_ext_ack *extack); int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ad9ad4aab5da..2157dc08c407 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -32,6 +32,7 @@ #include <linux/skbuff.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/netlink.h> #include <net/arp.h> #include <net/ip.h> @@ -456,7 +457,8 @@ static int fib_detect_death(struct fib_info *fi, int order, #ifdef CONFIG_IP_ROUTE_MULTIPATH -static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) +static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, + struct netlink_ext_ack *extack) { int nhs = 0; @@ -466,22 +468,35 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) } /* leftover implies invalid nexthop configuration, discard it */ - return remaining > 0 ? 0 : nhs; + if (remaining > 0) { + NL_SET_ERR_MSG(extack, + "Invalid nexthop configuration - extra data after nexthops"); + nhs = 0; + } + + return nhs; } static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, - int remaining, struct fib_config *cfg) + int remaining, struct fib_config *cfg, + struct netlink_ext_ack *extack) { int ret; change_nexthops(fi) { int attrlen; - if (!rtnh_ok(rtnh, remaining)) + if (!rtnh_ok(rtnh, remaining)) { + NL_SET_ERR_MSG(extack, + "Invalid nexthop configuration - extra data after nexthop"); return -EINVAL; + } - if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { + NL_SET_ERR_MSG(extack, + "Invalid flags for nexthop - can not contain DEAD or LINKDOWN"); return -EINVAL; + } nexthop_nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; @@ -507,13 +522,17 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); - if (!nla_entype) + if (!nla_entype) { + NL_SET_BAD_ATTR(extack, nla); + NL_SET_ERR_MSG(extack, + "Encap type is missing"); goto err_inval; + } ret = lwtunnel_build_state(nla_get_u16( nla_entype), nla, AF_INET, cfg, - &lwtstate); + &lwtstate, extack); if (ret) goto errout; nexthop_nh->nh_lwtstate = @@ -595,7 +614,8 @@ static inline void fib_add_weight(struct fib_info *fi, static int fib_encap_match(u16 encap_type, struct nlattr *encap, const struct fib_nh *nh, - const struct fib_config *cfg) + const struct fib_config *cfg, + struct netlink_ext_ack *extack) { struct lwtunnel_state *lwtstate; int ret, result = 0; @@ -603,8 +623,8 @@ static int fib_encap_match(u16 encap_type, if (encap_type == LWTUNNEL_ENCAP_NONE) return 0; - ret = lwtunnel_build_state(encap_type, encap, - AF_INET, cfg, &lwtstate); + ret = lwtunnel_build_state(encap_type, encap, AF_INET, + cfg, &lwtstate, extack); if (!ret) { result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate); lwtstate_free(lwtstate); @@ -613,7 +633,8 @@ static int fib_encap_match(u16 encap_type, return result; } -int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) +int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, + struct netlink_ext_ack *extack) { #ifdef CONFIG_IP_ROUTE_MULTIPATH struct rtnexthop *rtnh; @@ -625,9 +646,9 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) if (cfg->fc_oif || cfg->fc_gw) { if (cfg->fc_encap) { - if (fib_encap_match(cfg->fc_encap_type, - cfg->fc_encap, fi->fib_nh, cfg)) - return 1; + if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap, + fi->fib_nh, cfg, extack)) + return 1; } if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) @@ -716,7 +737,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) * |-> {local prefix} (terminal node) */ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, - struct fib_nh *nh) + struct fib_nh *nh, struct netlink_ext_ack *extack) { int err = 0; struct net *net; @@ -729,16 +750,25 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, if (nh->nh_flags & RTNH_F_ONLINK) { unsigned int addr_type; - if (cfg->fc_scope >= RT_SCOPE_LINK) + if (cfg->fc_scope >= RT_SCOPE_LINK) { + NL_SET_ERR_MSG(extack, + "Nexthop has invalid scope"); return -EINVAL; + } dev = __dev_get_by_index(net, nh->nh_oif); if (!dev) return -ENODEV; - if (!(dev->flags & IFF_UP)) + if (!(dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, + "Nexthop device is not up"); return -ENETDOWN; + } addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw); - if (addr_type != RTN_UNICAST) + if (addr_type != RTN_UNICAST) { + NL_SET_ERR_MSG(extack, + "Nexthop has invalid gateway"); return -EINVAL; + } if (!netif_carrier_ok(dev)) nh->nh_flags |= RTNH_F_LINKDOWN; nh->nh_dev = dev; @@ -778,18 +808,25 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, } if (err) { + NL_SET_ERR_MSG(extack, + "Nexthop has invalid gateway"); rcu_read_unlock(); return err; } } err = -EINVAL; - if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) + if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) { + NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); goto out; + } nh->nh_scope = res.scope; nh->nh_oif = FIB_RES_OIF(res); nh->nh_dev = dev = FIB_RES_DEV(res); - if (!dev) + if (!dev) { + NL_SET_ERR_MSG(extack, + "No egress device for nexthop gateway"); goto out; + } dev_hold(dev); if (!netif_carrier_ok(dev)) nh->nh_flags |= RTNH_F_LINKDOWN; @@ -797,17 +834,21 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, } else { struct in_device *in_dev; - if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) + if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) { + NL_SET_ERR_MSG(extack, + "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set"); return -EINVAL; - + } rcu_read_lock(); err = -ENODEV; in_dev = inetdev_by_index(net, nh->nh_oif); if (!in_dev) goto out; err = -ENETDOWN; - if (!(in_dev->dev->flags & IFF_UP)) + if (!(in_dev->dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Device for nexthop is not up"); goto out; + } nh->nh_dev = in_dev->dev; dev_hold(nh->nh_dev); nh->nh_scope = RT_SCOPE_HOST; @@ -982,7 +1023,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) return 0; } -struct fib_info *fib_create_info(struct fib_config *cfg) +struct fib_info *fib_create_info(struct fib_config *cfg, + struct netlink_ext_ack *extack) { int err; struct fib_info *fi = NULL; @@ -994,15 +1036,20 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto err_inval; /* Fast check to catch the most weird cases */ - if (fib_props[cfg->fc_type].scope > cfg->fc_scope) + if (fib_props[cfg->fc_type].scope > cfg->fc_scope) { + NL_SET_ERR_MSG(extack, "Invalid scope"); goto err_inval; + } - if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { + NL_SET_ERR_MSG(extack, + "Invalid rtm_flags - can not contain DEAD or LINKDOWN"); goto err_inval; + } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (cfg->fc_mp) { - nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); + nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack); if (nhs == 0) goto err_inval; } @@ -1065,18 +1112,29 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (cfg->fc_mp) { #ifdef CONFIG_IP_ROUTE_MULTIPATH - err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); + err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack); if (err != 0) goto failure; - if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) + if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) { + NL_SET_ERR_MSG(extack, + "Nexthop device index does not match RTA_OIF"); goto err_inval; - if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) + } + if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) { + NL_SET_ERR_MSG(extack, + "Nexthop gateway does not match RTA_GATEWAY"); goto err_inval; + } #ifdef CONFIG_IP_ROUTE_CLASSID - if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) + if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) { + NL_SET_ERR_MSG(extack, + "Nexthop class id does not match RTA_FLOW"); goto err_inval; + } #endif #else + NL_SET_ERR_MSG(extack, + "Multipath support not enabled in kernel"); goto err_inval; #endif } else { @@ -1085,11 +1143,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (cfg->fc_encap) { struct lwtunnel_state *lwtstate; - if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) + if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) { + NL_SET_ERR_MSG(extack, + "LWT encap type not specified"); goto err_inval; + } err = lwtunnel_build_state(cfg->fc_encap_type, cfg->fc_encap, AF_INET, cfg, - &lwtstate); + &lwtstate, extack); if (err) goto failure; @@ -1109,8 +1170,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } if (fib_props[cfg->fc_type].error) { - if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) + if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) { + NL_SET_ERR_MSG(extack, + "Gateway, device and multipath can not be specified for this route type"); goto err_inval; + } goto link_it; } else { switch (cfg->fc_type) { @@ -1121,19 +1185,30 @@ struct fib_info *fib_create_info(struct fib_config *cfg) case RTN_MULTICAST: break; default: + NL_SET_ERR_MSG(extack, "Invalid route type"); goto err_inval; } } - if (cfg->fc_scope > RT_SCOPE_HOST) + if (cfg->fc_scope > RT_SCOPE_HOST) { + NL_SET_ERR_MSG(extack, "Invalid scope"); goto err_inval; + } if (cfg->fc_scope == RT_SCOPE_HOST) { struct fib_nh *nh = fi->fib_nh; /* Local address is added. */ - if (nhs != 1 || nh->nh_gw) + if (nhs != 1) { + NL_SET_ERR_MSG(extack, + "Route with host scope can not have multiple nexthops"); + goto err_inval; + } + if (nh->nh_gw) { + NL_SET_ERR_MSG(extack, + "Route with host scope can not have a gateway"); goto err_inval; + } nh->nh_scope = RT_SCOPE_NOWHERE; nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); err = -ENODEV; @@ -1143,7 +1218,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) int linkdown = 0; change_nexthops(fi) { - err = fib_check_nh(cfg, fi, nexthop_nh); + err = fib_check_nh(cfg, fi, nexthop_nh, extack); if (err != 0) goto failure; if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN) @@ -1153,8 +1228,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_flags |= RTNH_F_LINKDOWN; } - if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) + if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) { + NL_SET_ERR_MSG(extack, "Invalid prefsrc address"); goto err_inval; + } change_nexthops(fi) { fib_info_update_nh_saddr(net, nexthop_nh); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 51182ff2b441..d56659e97a6e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1099,9 +1099,25 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp, return 0; } +static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack) +{ + if (plen > KEYLENGTH) { + NL_SET_ERR_MSG(extack, "Invalid prefix length"); + return false; + } + + if ((plen < KEYLENGTH) && (key << plen)) { + NL_SET_ERR_MSG(extack, + "Invalid prefix for given prefix length"); + return false; + } + + return true; +} + /* Caller must hold RTNL. */ int fib_table_insert(struct net *net, struct fib_table *tb, - struct fib_config *cfg) + struct fib_config *cfg, struct netlink_ext_ack *extack) { enum fib_event_type event = FIB_EVENT_ENTRY_ADD; struct trie *t = (struct trie *)tb->tb_data; @@ -1115,17 +1131,14 @@ int fib_table_insert(struct net *net, struct fib_table *tb, u32 key; int err; - if (plen > KEYLENGTH) - return -EINVAL; - key = ntohl(cfg->fc_dst); - pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); - - if ((plen < KEYLENGTH) && (key << plen)) + if (!fib_valid_key_len(key, plen, extack)) return -EINVAL; - fi = fib_create_info(cfg); + pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); + + fi = fib_create_info(cfg, extack); if (IS_ERR(fi)) { err = PTR_ERR(fi); goto err; @@ -1452,6 +1465,7 @@ found: if (!(fib_flags & FIB_LOOKUP_NOREF)) atomic_inc(&fi->fib_clntref); + res->prefix = htonl(n->key); res->prefixlen = KEYLENGTH - fa->fa_slen; res->nh_sel = nhsel; res->type = fa->fa_type; @@ -1507,7 +1521,7 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp, /* Caller must hold RTNL. */ int fib_table_delete(struct net *net, struct fib_table *tb, - struct fib_config *cfg) + struct fib_config *cfg, struct netlink_ext_ack *extack) { struct trie *t = (struct trie *) tb->tb_data; struct fib_alias *fa, *fa_to_delete; @@ -1517,12 +1531,9 @@ int fib_table_delete(struct net *net, struct fib_table *tb, u8 tos = cfg->fc_tos; u32 key; - if (plen > KEYLENGTH) - return -EINVAL; - key = ntohl(cfg->fc_dst); - if ((plen < KEYLENGTH) && (key << plen)) + if (!fib_valid_key_len(key, plen, extack)) return -EINVAL; l = fib_find_node(t, &tp, key); @@ -1551,7 +1562,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb, fi->fib_prefsrc == cfg->fc_prefsrc) && (!cfg->fc_protocol || fi->fib_protocol == cfg->fc_protocol) && - fib_nh_match(cfg, fi) == 0) { + fib_nh_match(cfg, fi, extack) == 0) { fa_to_delete = fa; break; } diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 805f6607f8d9..8e0257d01200 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -8,6 +8,7 @@ #include <linux/kernel.h> #include <net/genetlink.h> #include <net/gue.h> +#include <net/fou.h> #include <net/ip.h> #include <net/protocol.h> #include <net/udp.h> @@ -859,25 +860,6 @@ size_t gue_encap_hlen(struct ip_tunnel_encap *e) } EXPORT_SYMBOL(gue_encap_hlen); -static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, - struct flowi4 *fl4, u8 *protocol, __be16 sport) -{ - struct udphdr *uh; - - skb_push(skb, sizeof(struct udphdr)); - skb_reset_transport_header(skb); - - uh = udp_hdr(skb); - - uh->dest = e->dport; - uh->source = sport; - uh->len = htons(skb->len); - udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb, - fl4->saddr, fl4->daddr, skb->len); - - *protocol = IPPROTO_UDP; -} - int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, __be16 *sport, int type) { @@ -894,24 +876,6 @@ int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, } EXPORT_SYMBOL(__fou_build_header); -int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi4 *fl4) -{ - int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : - SKB_GSO_UDP_TUNNEL; - __be16 sport; - int err; - - err = __fou_build_header(skb, e, protocol, &sport, type); - if (err) - return err; - - fou_build_udp(skb, e, fl4, protocol, sport); - - return 0; -} -EXPORT_SYMBOL(fou_build_header); - int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, __be16 *sport, int type) { @@ -985,8 +949,46 @@ int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, } EXPORT_SYMBOL(__gue_build_header); -int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi4 *fl4) +#ifdef CONFIG_NET_FOU_IP_TUNNELS + +static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, + struct flowi4 *fl4, u8 *protocol, __be16 sport) +{ + struct udphdr *uh; + + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + + uh = udp_hdr(skb); + + uh->dest = e->dport; + uh->source = sport; + uh->len = htons(skb->len); + udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb, + fl4->saddr, fl4->daddr, skb->len); + + *protocol = IPPROTO_UDP; +} + +static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi4 *fl4) +{ + int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : + SKB_GSO_UDP_TUNNEL; + __be16 sport; + int err; + + err = __fou_build_header(skb, e, protocol, &sport, type); + if (err) + return err; + + fou_build_udp(skb, e, fl4, protocol, sport); + + return 0; +} + +static int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi4 *fl4) { int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; @@ -1001,9 +1003,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, return 0; } -EXPORT_SYMBOL(gue_build_header); -#ifdef CONFIG_NET_FOU_IP_TUNNELS static const struct ip_tunnel_encap_ops fou_iptun_ops = { .encap_hlen = fou_encap_hlen, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 9144fa7df2ad..c2be26b98b5f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -489,7 +489,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); - rt = __ip_route_output_key_hash(net, fl4, skb_in); + rt = ip_route_output_key_hash(net, fl4, skb_in); if (IS_ERR(rt)) return rt; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 1054d330bf9d..a3fa1a5b6d98 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -25,6 +25,7 @@ #include <net/xfrm.h> #include <net/tcp.h> #include <net/sock_reuseport.h> +#include <net/addrconf.h> #ifdef INET_CSK_DEBUG const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; @@ -789,7 +790,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); - newsk->sk_write_space = sk_stream_write_space; /* listeners have SOCK_RCU_FREE, not the children */ sock_reset_flag(newsk, SOCK_RCU_FREE); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index baf196eaf1d8..90e11479c725 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -228,14 +228,16 @@ static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = { static int ip_tun_build_state(struct nlattr *attr, unsigned int family, const void *cfg, - struct lwtunnel_state **ts) + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) { struct ip_tunnel_info *tun_info; struct lwtunnel_state *new_state; struct nlattr *tb[LWTUNNEL_IP_MAX + 1]; int err; - err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, NULL); + err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, + extack); if (err < 0) return err; @@ -325,7 +327,8 @@ static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { static int ip6_tun_build_state(struct nlattr *attr, unsigned int family, const void *cfg, - struct lwtunnel_state **ts) + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) { struct ip_tunnel_info *tun_info; struct lwtunnel_state *new_state; @@ -333,7 +336,7 @@ static int ip6_tun_build_state(struct nlattr *attr, int err; err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy, - NULL); + extack); if (err < 0) return err; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8ae425cad818..a1199895b8a6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2526,6 +2526,129 @@ static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, return ipmr_mfc_delete(tbl, &mfcc, parent); } +static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) +{ + u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len); + + if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || + nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || + nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, + mrt->mroute_reg_vif_num) || + nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, + mrt->mroute_do_assert) || + nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim)) + return false; + + return true; +} + +static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) +{ + struct nlattr *vif_nest; + struct vif_device *vif; + + /* if the VIF doesn't exist just continue */ + if (!VIF_EXISTS(mrt, vifid)) + return true; + + vif = &mrt->vif_table[vifid]; + vif_nest = nla_nest_start(skb, IPMRA_VIF); + if (!vif_nest) + return false; + if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || + nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || + nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || + nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, + IPMRA_VIFA_PAD) || + nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out, + IPMRA_VIFA_PAD) || + nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in, + IPMRA_VIFA_PAD) || + nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out, + IPMRA_VIFA_PAD) || + nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || + nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { + nla_nest_cancel(skb, vif_nest); + return false; + } + nla_nest_end(skb, vif_nest); + + return true; +} + +static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct nlmsghdr *nlh = NULL; + unsigned int t = 0, s_t; + unsigned int e = 0, s_e; + struct mr_table *mrt; + + s_t = cb->args[0]; + s_e = cb->args[1]; + + ipmr_for_each_table(mrt, net) { + struct nlattr *vifs, *af; + struct ifinfomsg *hdr; + u32 i; + + if (t < s_t) + goto skip_table; + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWLINK, + sizeof(*hdr), NLM_F_MULTI); + if (!nlh) + break; + + hdr = nlmsg_data(nlh); + memset(hdr, 0, sizeof(*hdr)); + hdr->ifi_family = RTNL_FAMILY_IPMR; + + af = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af) { + nlmsg_cancel(skb, nlh); + goto out; + } + + if (!ipmr_fill_table(mrt, skb)) { + nlmsg_cancel(skb, nlh); + goto out; + } + + vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS); + if (!vifs) { + nla_nest_end(skb, af); + nlmsg_end(skb, nlh); + goto out; + } + for (i = 0; i < mrt->maxvif; i++) { + if (e < s_e) + goto skip_entry; + if (!ipmr_fill_vif(mrt, i, skb)) { + nla_nest_end(skb, vifs); + nla_nest_end(skb, af); + nlmsg_end(skb, nlh); + goto out; + } +skip_entry: + e++; + } + s_e = 0; + e = 0; + nla_nest_end(skb, vifs); + nla_nest_end(skb, af); + nlmsg_end(skb, nlh); +skip_table: + t++; + } + +out: + cb->args[1] = e; + cb->args[0] = t; + + return skb->len; +} + #ifdef CONFIG_PROC_FS /* The /proc interfaces to multicast routing : * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif @@ -2868,6 +2991,9 @@ int __init ip_mr_init(void) ipmr_rtm_route, NULL, NULL); rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, ipmr_rtm_route, NULL, NULL); + + rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, + NULL, ipmr_rtm_dumplink, NULL); return 0; #ifdef CONFIG_IP_PIMSM_V2 diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 7cd8d0d918f8..6f8d9e5e062b 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -172,7 +172,7 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) struct iphdr *iph = ip_hdr(skb_in); u8 proto; - if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET)) + if (iph->frag_off & htons(IP_OFFSET)) return; if (skb_csum_unnecessary(skb_in)) { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index fa44e752a9a3..43eb6567b3a0 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -250,6 +250,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPAbortOnLinger", LINUX_MIB_TCPABORTONLINGER), SNMP_MIB_ITEM("TCPAbortFailed", LINUX_MIB_TCPABORTFAILED), SNMP_MIB_ITEM("TCPMemoryPressures", LINUX_MIB_TCPMEMORYPRESSURES), + SNMP_MIB_ITEM("TCPMemoryPressuresChrono", LINUX_MIB_TCPMEMORYPRESSURESCHRONO), SNMP_MIB_ITEM("TCPSACKDiscard", LINUX_MIB_TCPSACKDISCARD), SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6883b3d4ba8f..9b38cf18144e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -114,6 +114,8 @@ #include <net/ip_tunnels.h> #include <net/l3mdev.h> +#include "fib_lookup.h" + #define RT_FL_TOS(oldflp4) \ ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) @@ -1860,9 +1862,9 @@ static int ip_mkroute_input(struct sk_buff *skb, */ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev) + u8 tos, struct net_device *dev, + struct fib_result *res) { - struct fib_result res; struct in_device *in_dev = __in_dev_get_rcu(dev); struct ip_tunnel_info *tun_info; struct flowi4 fl4; @@ -1892,8 +1894,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) goto martian_source; - res.fi = NULL; - res.table = NULL; + res->fi = NULL; + res->table = NULL; if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) goto brd_input; @@ -1929,17 +1931,17 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_uid = sock_net_uid(net, NULL); - err = fib_lookup(net, &fl4, &res, 0); + err = fib_lookup(net, &fl4, res, 0); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) err = -EHOSTUNREACH; goto no_route; } - if (res.type == RTN_BROADCAST) + if (res->type == RTN_BROADCAST) goto brd_input; - if (res.type == RTN_LOCAL) { + if (res->type == RTN_LOCAL) { err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &itag); if (err < 0) @@ -1951,10 +1953,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, err = -EHOSTUNREACH; goto no_route; } - if (res.type != RTN_UNICAST) + if (res->type != RTN_UNICAST) goto martian_destination; - err = ip_mkroute_input(skb, &res, in_dev, daddr, saddr, tos); + err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos); out: return err; brd_input: @@ -1968,14 +1970,14 @@ brd_input: goto martian_source; } flags |= RTCF_BROADCAST; - res.type = RTN_BROADCAST; + res->type = RTN_BROADCAST; RT_CACHE_STAT_INC(in_brd); local_input: do_cache = false; - if (res.fi) { + if (res->fi) { if (!itag) { - rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input); + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); err = 0; @@ -1986,7 +1988,7 @@ local_input: } rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, - flags | RTCF_LOCAL, res.type, + flags | RTCF_LOCAL, res->type, IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); if (!rth) goto e_nobufs; @@ -1996,18 +1998,18 @@ local_input: rth->dst.tclassid = itag; #endif rth->rt_is_input = 1; - if (res.table) - rth->rt_table_id = res.table->tb_id; + if (res->table) + rth->rt_table_id = res->table->tb_id; RT_CACHE_STAT_INC(in_slow_tot); - if (res.type == RTN_UNREACHABLE) { + if (res->type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } if (do_cache) { - struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh *nh = &FIB_RES_NH(*res); rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); if (lwtunnel_input_redirect(rth->dst.lwtstate)) { @@ -2027,9 +2029,9 @@ local_input: no_route: RT_CACHE_STAT_INC(in_no_route); - res.type = RTN_UNREACHABLE; - res.fi = NULL; - res.table = NULL; + res->type = RTN_UNREACHABLE; + res->fi = NULL; + res->table = NULL; goto local_input; /* @@ -2059,11 +2061,22 @@ martian_source: int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev) { - int res; + struct fib_result res; + int err; tos &= IPTOS_RT_MASK; rcu_read_lock(); + err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res); + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(ip_route_input_noref); + +/* called with rcu_read_lock held */ +int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev, struct fib_result *res) +{ /* Multicast recognition logic is moved from route cache to here. The problem was that too many Ethernet cards have broken/missing hardware multicast filters :-( As result the host on multicasting @@ -2078,6 +2091,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (ipv4_is_multicast(daddr)) { struct in_device *in_dev = __in_dev_get_rcu(dev); int our = 0; + int err = -EINVAL; if (in_dev) our = ip_check_mc_rcu(in_dev, daddr, saddr, @@ -2093,7 +2107,6 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, ip_hdr(skb)->protocol); } - res = -EINVAL; if (our #ifdef CONFIG_IP_MROUTE || @@ -2101,17 +2114,14 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, IN_DEV_MFORWARD(in_dev)) #endif ) { - res = ip_route_input_mc(skb, daddr, saddr, + err = ip_route_input_mc(skb, daddr, saddr, tos, dev, our); } - rcu_read_unlock(); - return res; + return err; } - res = ip_route_input_slow(skb, daddr, saddr, tos, dev); - rcu_read_unlock(); - return res; + + return ip_route_input_slow(skb, daddr, saddr, tos, dev, res); } -EXPORT_SYMBOL(ip_route_input_noref); /* called with rcu_read_lock() */ static struct rtable *__mkroute_output(const struct fib_result *res, @@ -2254,29 +2264,40 @@ add: * Major route resolver routine. */ -struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, - const struct sk_buff *skb) +struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, + const struct sk_buff *skb) { - struct net_device *dev_out = NULL; __u8 tos = RT_FL_TOS(fl4); - unsigned int flags = 0; struct fib_result res; struct rtable *rth; - int orig_oif; - int err = -ENETUNREACH; res.tclassid = 0; res.fi = NULL; res.table = NULL; - orig_oif = fl4->flowi4_oif; - fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); rcu_read_lock(); + rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb); + rcu_read_unlock(); + + return rth; +} +EXPORT_SYMBOL_GPL(ip_route_output_key_hash); + +struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, + struct fib_result *res, + const struct sk_buff *skb) +{ + struct net_device *dev_out = NULL; + int orig_oif = fl4->flowi4_oif; + unsigned int flags = 0; + struct rtable *rth; + int err = -ENETUNREACH; + if (fl4->saddr) { rth = ERR_PTR(-EINVAL); if (ipv4_is_multicast(fl4->saddr) || @@ -2362,15 +2383,15 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); dev_out = net->loopback_dev; fl4->flowi4_oif = LOOPBACK_IFINDEX; - res.type = RTN_LOCAL; + res->type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; } - err = fib_lookup(net, fl4, &res, 0); + err = fib_lookup(net, fl4, res, 0); if (err) { - res.fi = NULL; - res.table = NULL; + res->fi = NULL; + res->table = NULL; if (fl4->flowi4_oif && (ipv4_is_multicast(fl4->daddr) || !netif_index_is_l3_master(net, fl4->flowi4_oif))) { @@ -2395,43 +2416,41 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, if (fl4->saddr == 0) fl4->saddr = inet_select_addr(dev_out, 0, RT_SCOPE_LINK); - res.type = RTN_UNICAST; + res->type = RTN_UNICAST; goto make_route; } rth = ERR_PTR(err); goto out; } - if (res.type == RTN_LOCAL) { + if (res->type == RTN_LOCAL) { if (!fl4->saddr) { - if (res.fi->fib_prefsrc) - fl4->saddr = res.fi->fib_prefsrc; + if (res->fi->fib_prefsrc) + fl4->saddr = res->fi->fib_prefsrc; else fl4->saddr = fl4->daddr; } /* L3 master device is the loopback for that domain */ - dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(res)) ? : + dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? : net->loopback_dev; fl4->flowi4_oif = dev_out->ifindex; flags |= RTCF_LOCAL; goto make_route; } - fib_select_path(net, &res, fl4, skb); + fib_select_path(net, res, fl4, skb); - dev_out = FIB_RES_DEV(res); + dev_out = FIB_RES_DEV(*res); fl4->flowi4_oif = dev_out->ifindex; make_route: - rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags); + rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags); out: - rcu_read_unlock(); return rth; } -EXPORT_SYMBOL_GPL(__ip_route_output_key_hash); static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) { @@ -2525,9 +2544,10 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); +/* called with rcu_read_lock held */ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, struct flowi4 *fl4, struct sk_buff *skb, u32 portid, - u32 seq, int event) + u32 seq) { struct rtable *rt = skb_rtable(skb); struct rtmsg *r; @@ -2536,7 +2556,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, u32 error; u32 metrics[RTAX_MAX]; - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), 0); + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0); if (!nlh) return -EMSGSIZE; @@ -2644,6 +2664,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct net *net = sock_net(in_skb->sk); struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; + struct fib_result res = {}; struct rtable *rt = NULL; struct flowi4 fl4; __be32 dst = 0; @@ -2700,10 +2721,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.flowi4_mark = mark; fl4.flowi4_uid = uid; + rcu_read_lock(); + if (iif) { struct net_device *dev; - dev = __dev_get_by_index(net, iif); + dev = dev_get_by_index_rcu(net, iif); if (!dev) { err = -ENODEV; goto errout_free; @@ -2712,14 +2735,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, skb->protocol = htons(ETH_P_IP); skb->dev = dev; skb->mark = mark; - err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); + err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos, + dev, &res); rt = skb_rtable(skb); if (err == 0 && rt->dst.error) err = -rt->dst.error; } else { - rt = ip_route_output_key(net, &fl4); - + rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb); err = 0; if (IS_ERR(rt)) err = PTR_ERR(rt); @@ -2735,17 +2758,25 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) table_id = rt->rt_table_id; - err = rt_fill_info(net, dst, src, table_id, &fl4, skb, - NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - RTM_NEWROUTE); + if (rtm->rtm_flags & RTM_F_FIB_MATCH) + err = fib_dump_info(skb, NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, RTM_NEWROUTE, table_id, + rt->rt_type, res.prefix, res.prefixlen, + fl4.flowi4_tos, res.fi, 0); + else + err = rt_fill_info(net, dst, src, table_id, &fl4, skb, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq); if (err < 0) goto errout_free; + rcu_read_unlock(); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; errout_free: + rcu_read_unlock(); kfree_skb(skb); goto errout; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 0257d965f111..7835bb4a1fab 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -66,10 +66,10 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, * Since subsequent timestamps use the normal tcp_time_stamp value, we * must make sure that the resulting initial timestamp is <= tcp_time_stamp. */ -__u32 cookie_init_timestamp(struct request_sock *req) +u64 cookie_init_timestamp(struct request_sock *req) { struct inet_request_sock *ireq; - u32 ts, ts_now = tcp_time_stamp; + u32 ts, ts_now = tcp_time_stamp_raw(); u32 options = 0; ireq = inet_rsk(req); @@ -88,7 +88,7 @@ __u32 cookie_init_timestamp(struct request_sock *req) ts <<= TSBITS; ts |= options; } - return ts; + return (u64)ts * (USEC_PER_SEC / TCP_TS_HZ); } @@ -232,7 +232,8 @@ EXPORT_SYMBOL(tcp_get_cookie_sock); * return false if we decode a tcp option that is disabled * on the host. */ -bool cookie_timestamp_decode(struct tcp_options_received *tcp_opt) +bool cookie_timestamp_decode(const struct net *net, + struct tcp_options_received *tcp_opt) { /* echoed timestamp, lowest bits contain options */ u32 options = tcp_opt->rcv_tsecr; @@ -242,12 +243,12 @@ bool cookie_timestamp_decode(struct tcp_options_received *tcp_opt) return true; } - if (!sysctl_tcp_timestamps) + if (!net->ipv4.sysctl_tcp_timestamps) return false; tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0; - if (tcp_opt->sack_ok && !sysctl_tcp_sack) + if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack) return false; if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK) @@ -256,7 +257,7 @@ bool cookie_timestamp_decode(struct tcp_options_received *tcp_opt) tcp_opt->wscale_ok = 1; tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK; - return sysctl_tcp_window_scaling != 0; + return net->ipv4.sysctl_tcp_window_scaling != 0; } EXPORT_SYMBOL(cookie_timestamp_decode); @@ -312,14 +313,16 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0, NULL); + tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcp_ts_off(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); + tsoff = secure_tcp_ts_off(sock_net(sk), + ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr); tcp_opt.rcv_tsecr -= tsoff; } - if (!cookie_timestamp_decode(&tcp_opt)) + if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt)) goto out; ret = NULL; @@ -343,7 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->snt_synack.v64 = 0; + treq->snt_synack = 0; treq->tfo_listener = false; ireq->ir_iif = inet_request_bound_dev_if(sk, skb); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 86957e9cd6c6..7065234a89a5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -365,27 +365,6 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, static struct ctl_table ipv4_table[] = { { - .procname = "tcp_timestamps", - .data = &sysctl_tcp_timestamps, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_window_scaling", - .data = &sysctl_tcp_window_scaling, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_sack", - .data = &sysctl_tcp_sack, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_retrans_collapse", .data = &sysctl_tcp_retrans_collapse, .maxlen = sizeof(int), @@ -1116,6 +1095,27 @@ static struct ctl_table ipv4_net_table[] = { .extra2 = &one, }, #endif + { + .procname = "tcp_sack", + .data = &init_net.ipv4.sysctl_tcp_sack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_window_scaling", + .data = &init_net.ipv4.sysctl_tcp_window_scaling, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_timestamps", + .data = &init_net.ipv4.sysctl_tcp_timestamps, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b5ea036ca781..cc8fd8b747a4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -320,17 +320,36 @@ struct tcp_splice_state { * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ -int tcp_memory_pressure __read_mostly; -EXPORT_SYMBOL(tcp_memory_pressure); +unsigned long tcp_memory_pressure __read_mostly; +EXPORT_SYMBOL_GPL(tcp_memory_pressure); void tcp_enter_memory_pressure(struct sock *sk) { - if (!tcp_memory_pressure) { + unsigned long val; + + if (tcp_memory_pressure) + return; + val = jiffies; + + if (!val) + val--; + if (!cmpxchg(&tcp_memory_pressure, 0, val)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES); - tcp_memory_pressure = 1; - } } -EXPORT_SYMBOL(tcp_enter_memory_pressure); +EXPORT_SYMBOL_GPL(tcp_enter_memory_pressure); + +void tcp_leave_memory_pressure(struct sock *sk) +{ + unsigned long val; + + if (!tcp_memory_pressure) + return; + val = xchg(&tcp_memory_pressure, 0); + if (val) + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO, + jiffies_to_msecs(jiffies - val)); +} +EXPORT_SYMBOL_GPL(tcp_leave_memory_pressure); /* Convert seconds to retransmits based on initial and max timeout */ static u8 secs_to_retrans(int seconds, int timeout, int rto_max) @@ -386,7 +405,7 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - minmax_reset(&tp->rtt_min, tcp_time_stamp, ~0U); + minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U); /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control @@ -2186,7 +2205,7 @@ adjudge_to_death: /* Now socket is owned by kernel and we acquire BH lock - to finish close. No need to check for user refs. + * to finish close. No need to check for user refs. */ local_bh_disable(); bh_lock_sock(sk); @@ -2480,7 +2499,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_MAXSEG: /* Values greater than interface MTU won't take effect. However * at the point when this call is done we typically don't yet - * know which interface is going to be used */ + * know which interface is going to be used + */ if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) { err = -EINVAL; break; @@ -2715,7 +2735,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (!tp->repair) err = -EPERM; else - tp->tsoffset = val - tcp_time_stamp; + tp->tsoffset = val - tcp_time_stamp_raw(); break; case TCP_REPAIR_WINDOW: err = tcp_repair_set_window(tp, optval, optlen); @@ -2766,7 +2786,7 @@ static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) { stats[i] = tp->chrono_stat[i - 1]; if (i == tp->chrono_type) - stats[i] += tcp_time_stamp - tp->chrono_start; + stats[i] += tcp_jiffies32 - tp->chrono_start; stats[i] *= USEC_PER_SEC / HZ; total += stats[i]; } @@ -2850,7 +2870,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_retrans = tp->retrans_out; info->tcpi_fackets = tp->fackets_out; - now = tcp_time_stamp; + now = tcp_jiffies32; info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); @@ -3081,7 +3101,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_TIMESTAMP: - val = tcp_time_stamp + tp->tsoffset; + val = tcp_time_stamp_raw() + tp->tsoffset; break; case TCP_NOTSENT_LOWAT: val = tp->notsent_lowat; diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index b89bce4c721e..dbcc9352a48f 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -52,10 +52,9 @@ * There is a public e-mail list for discussing BBR development and testing: * https://groups.google.com/forum/#!forum/bbr-dev * - * NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled, - * since pacing is integral to the BBR design and implementation. - * BBR without pacing would not function properly, and may incur unnecessary - * high packet loss rates. + * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled, + * otherwise TCP stack falls back to an internal pacing using one high + * resolution timer per TCP socket and may use more resources. */ #include <linux/module.h> #include <net/tcp.h> @@ -92,7 +91,7 @@ struct bbr { struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */ u32 rtt_cnt; /* count of packet-timed rounds elapsed */ u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ - struct skb_mstamp cycle_mstamp; /* time of this cycle phase start */ + u64 cycle_mstamp; /* time of this cycle phase start */ u32 mode:3, /* current bbr_mode in state machine */ prev_ca_state:3, /* CA state on previous ACK */ packet_conservation:1, /* use packet conservation? */ @@ -412,7 +411,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk, struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); bool is_full_length = - skb_mstamp_us_delta(&tp->delivered_mstamp, &bbr->cycle_mstamp) > + tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) > bbr->min_rtt_us; u32 inflight, bw; @@ -498,7 +497,7 @@ static void bbr_reset_lt_bw_sampling_interval(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - bbr->lt_last_stamp = tp->delivered_mstamp.stamp_jiffies; + bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC); bbr->lt_last_delivered = tp->delivered; bbr->lt_last_lost = tp->lost; bbr->lt_rtt_cnt = 0; @@ -552,7 +551,7 @@ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) struct bbr *bbr = inet_csk_ca(sk); u32 lost, delivered; u64 bw; - s32 t; + u32 t; if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */ if (bbr->mode == BBR_PROBE_BW && bbr->round_start && @@ -604,15 +603,15 @@ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) return; /* Find average delivery rate in this sampling interval. */ - t = (s32)(tp->delivered_mstamp.stamp_jiffies - bbr->lt_last_stamp); - if (t < 1) - return; /* interval is less than one jiffy, so wait */ - t = jiffies_to_usecs(t); - /* Interval long enough for jiffies_to_usecs() to return a bogus 0? */ - if (t < 1) { + t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp; + if ((s32)t < 1) + return; /* interval is less than one ms, so wait */ + /* Check if can multiply without overflow */ + if (t >= ~0U / USEC_PER_MSEC) { bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */ return; } + t *= USEC_PER_MSEC; bw = (u64)delivered * BW_UNIT; do_div(bw, t); bbr_lt_bw_interval_done(sk, bw); @@ -731,12 +730,12 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) bool filter_expired; /* Track min RTT seen in the min_rtt_win_sec filter window: */ - filter_expired = after(tcp_time_stamp, + filter_expired = after(tcp_jiffies32, bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); if (rs->rtt_us >= 0 && (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) { bbr->min_rtt_us = rs->rtt_us; - bbr->min_rtt_stamp = tcp_time_stamp; + bbr->min_rtt_stamp = tcp_jiffies32; } if (bbr_probe_rtt_mode_ms > 0 && filter_expired && @@ -755,7 +754,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) /* Maintain min packets in flight for max(200 ms, 1 round). */ if (!bbr->probe_rtt_done_stamp && tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) { - bbr->probe_rtt_done_stamp = tcp_time_stamp + + bbr->probe_rtt_done_stamp = tcp_jiffies32 + msecs_to_jiffies(bbr_probe_rtt_mode_ms); bbr->probe_rtt_round_done = 0; bbr->next_rtt_delivered = tp->delivered; @@ -763,8 +762,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) if (bbr->round_start) bbr->probe_rtt_round_done = 1; if (bbr->probe_rtt_round_done && - after(tcp_time_stamp, bbr->probe_rtt_done_stamp)) { - bbr->min_rtt_stamp = tcp_time_stamp; + after(tcp_jiffies32, bbr->probe_rtt_done_stamp)) { + bbr->min_rtt_stamp = tcp_jiffies32; bbr->restore_cwnd = 1; /* snap to prior_cwnd */ bbr_reset_mode(sk); } @@ -811,7 +810,7 @@ static void bbr_init(struct sock *sk) bbr->probe_rtt_done_stamp = 0; bbr->probe_rtt_round_done = 0; bbr->min_rtt_us = tcp_min_rtt(tp); - bbr->min_rtt_stamp = tcp_time_stamp; + bbr->min_rtt_stamp = tcp_jiffies32; minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ @@ -826,10 +825,12 @@ static void bbr_init(struct sock *sk) bbr->idle_restart = 0; bbr->full_bw = 0; bbr->full_bw_cnt = 0; - bbr->cycle_mstamp.v64 = 0; + bbr->cycle_mstamp = 0; bbr->cycle_idx = 0; bbr_reset_lt_bw_sampling(sk); bbr_reset_startup_mode(sk); + + cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); } static u32 bbr_sndbuf_expand(struct sock *sk) diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 36087bca9f48..609965f0e298 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -84,14 +84,14 @@ static void bictcp_init(struct sock *sk) static inline void bictcp_update(struct bictcp *ca, u32 cwnd) { if (ca->last_cwnd == cwnd && - (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32) + (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) return; ca->last_cwnd = cwnd; - ca->last_time = tcp_time_stamp; + ca->last_time = tcp_jiffies32; if (ca->epoch_start == 0) /* record the beginning of an epoch */ - ca->epoch_start = tcp_time_stamp; + ca->epoch_start = tcp_jiffies32; /* start off normal */ if (cwnd <= low_window) { diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 0683ba447d77..57ae5b5ae643 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -155,7 +155,7 @@ static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_TX_START) { struct bictcp *ca = inet_csk_ca(sk); - u32 now = tcp_time_stamp; + u32 now = tcp_jiffies32; s32 delta; delta = now - tcp_sk(sk)->lsndtime; @@ -231,21 +231,21 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked) ca->ack_cnt += acked; /* count the number of ACKed packets */ if (ca->last_cwnd == cwnd && - (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32) + (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) return; /* The CUBIC function can update ca->cnt at most once per jiffy. * On all cwnd reduction events, ca->epoch_start is set to 0, * which will force a recalculation of ca->cnt. */ - if (ca->epoch_start && tcp_time_stamp == ca->last_time) + if (ca->epoch_start && tcp_jiffies32 == ca->last_time) goto tcp_friendliness; ca->last_cwnd = cwnd; - ca->last_time = tcp_time_stamp; + ca->last_time = tcp_jiffies32; if (ca->epoch_start == 0) { - ca->epoch_start = tcp_time_stamp; /* record beginning */ + ca->epoch_start = tcp_jiffies32; /* record beginning */ ca->ack_cnt = acked; /* start counting */ ca->tcp_cwnd = cwnd; /* syn with cubic */ @@ -276,7 +276,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked) * if the cwnd < 1 million packets !!! */ - t = (s32)(tcp_time_stamp - ca->epoch_start); + t = (s32)(tcp_jiffies32 - ca->epoch_start); t += msecs_to_jiffies(ca->delay_min >> 3); /* change the unit from HZ to bictcp_HZ */ t <<= BICTCP_HZ; @@ -448,7 +448,7 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample) return; /* Discard delay samples right after fast recovery */ - if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ) + if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ) return; delay = (sample->rtt_us << 3) / USEC_PER_MSEC; diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 4a4d8e76738f..3eb78cde6ff0 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -104,7 +104,7 @@ static void measure_achieved_throughput(struct sock *sk, const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); - u32 now = tcp_time_stamp; + u32 now = tcp_jiffies32; if (icsk->icsk_ca_state == TCP_CA_Open) ca->pkts_acked = sample->pkts_acked; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 174d4376baa5..2ab7e2fa9bb9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -76,9 +76,6 @@ #include <asm/unaligned.h> #include <linux/errqueue.h> -int sysctl_tcp_timestamps __read_mostly = 1; -int sysctl_tcp_window_scaling __read_mostly = 1; -int sysctl_tcp_sack __read_mostly = 1; int sysctl_tcp_fack __read_mostly; int sysctl_tcp_max_reordering __read_mostly = 300; int sysctl_tcp_dsack __read_mostly = 1; @@ -112,6 +109,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ +#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -441,7 +439,7 @@ void tcp_init_buffer_space(struct sock *sk) tcp_sndbuf_expand(sk); tp->rcvq_space.space = tp->rcv_wnd; - skb_mstamp_get(&tp->tcp_mstamp); + tcp_mstamp_refresh(tp); tp->rcvq_space.time = tp->tcp_mstamp; tp->rcvq_space.seq = tp->copied_seq; @@ -463,7 +461,7 @@ void tcp_init_buffer_space(struct sock *sk) tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss); tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp); - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } /* 5. Recalculate window clamp after socket hit its memory bounds. */ @@ -555,11 +553,11 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp) { u32 delta_us; - if (tp->rcv_rtt_est.time.v64 == 0) + if (tp->rcv_rtt_est.time == 0) goto new_measure; if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) return; - delta_us = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcv_rtt_est.time); + delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time); tcp_rcv_rtt_update(tp, delta_us, 1); new_measure: @@ -571,13 +569,15 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + if (tp->rx_opt.rcv_tsecr && (TCP_SKB_CB(skb)->end_seq - - TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) - tcp_rcv_rtt_update(tp, - jiffies_to_usecs(tcp_time_stamp - - tp->rx_opt.rcv_tsecr), - 0); + TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) { + u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; + u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + + tcp_rcv_rtt_update(tp, delta_us, 0); + } } /* @@ -590,7 +590,7 @@ void tcp_rcv_space_adjust(struct sock *sk) int time; int copied; - time = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcvq_space.time); + time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) return; @@ -672,7 +672,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) tcp_rcv_rtt_measure(tp); - now = tcp_time_stamp; + now = tcp_jiffies32; if (!icsk->icsk_ack.ato) { /* The _first_ data packet received, initialize @@ -885,6 +885,9 @@ static void tcp_update_reordering(struct sock *sk, const int metric, struct tcp_sock *tp = tcp_sk(sk); int mib_idx; + if (WARN_ON_ONCE(metric < 0)) + return; + if (metric > tp->reordering) { tp->reordering = min(sysctl_tcp_max_reordering, metric); @@ -1134,8 +1137,8 @@ struct tcp_sacktag_state { * that was SACKed. RTO needs the earliest RTT to stay conservative, * but congestion control should still get an accurate delay signal. */ - struct skb_mstamp first_sackt; - struct skb_mstamp last_sackt; + u64 first_sackt; + u64 last_sackt; struct rate_sample *rate; int flag; }; @@ -1200,7 +1203,7 @@ static u8 tcp_sacktag_one(struct sock *sk, struct tcp_sacktag_state *state, u8 sacked, u32 start_seq, u32 end_seq, int dup_sack, int pcount, - const struct skb_mstamp *xmit_time) + u64 xmit_time) { struct tcp_sock *tp = tcp_sk(sk); int fack_count = state->fack_count; @@ -1242,9 +1245,9 @@ static u8 tcp_sacktag_one(struct sock *sk, state->reord); if (!after(end_seq, tp->high_seq)) state->flag |= FLAG_ORIG_SACK_ACKED; - if (state->first_sackt.v64 == 0) - state->first_sackt = *xmit_time; - state->last_sackt = *xmit_time; + if (state->first_sackt == 0) + state->first_sackt = xmit_time; + state->last_sackt = xmit_time; } if (sacked & TCPCB_LOST) { @@ -1304,7 +1307,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, */ tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, start_seq, end_seq, dup_sack, pcount, - &skb->skb_mstamp); + skb->skb_mstamp); tcp_rate_skb_delivered(sk, skb, state->rate); if (skb == tp->lost_skb_hint) @@ -1356,8 +1359,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tcp_advance_highest_sack(sk, skb); tcp_skb_collapse_tstamp(prev, skb); - if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64)) - TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0; + if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp)) + TCP_SKB_CB(prev)->tx.delivered_mstamp = 0; tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); @@ -1587,7 +1590,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, TCP_SKB_CB(skb)->end_seq, dup_sack, tcp_skb_pcount(skb), - &skb->skb_mstamp); + skb->skb_mstamp); tcp_rate_skb_delivered(sk, skb, state->rate); if (!before(TCP_SKB_CB(skb)->seq, @@ -1954,7 +1957,7 @@ void tcp_enter_loss(struct sock *sk) } tp->snd_cwnd = 1; tp->snd_cwnd_cnt = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; tp->retrans_out = 0; tp->lost_out = 0; @@ -2383,7 +2386,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) tcp_ecn_withdraw_cwr(tp); } } - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; tp->undo_marker = 0; } @@ -2520,7 +2523,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { tp->snd_cwnd = tp->snd_ssthresh; - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } @@ -2590,7 +2593,7 @@ static void tcp_mtup_probe_success(struct sock *sk) tcp_mss_to_mtu(sk, tp->mss_cache) / icsk->icsk_mtup.probe_size; tp->snd_cwnd_cnt = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_ssthresh = tcp_current_ssthresh(sk); icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; @@ -2911,13 +2914,13 @@ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) struct tcp_sock *tp = tcp_sk(sk); u32 wlen = sysctl_tcp_min_rtt_wlen * HZ; - minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp, + minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32, rtt_us ? : jiffies_to_usecs(1)); } -static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, - long seq_rtt_us, long sack_rtt_us, - long ca_rtt_us) +static bool tcp_ack_update_rtt(struct sock *sk, const int flag, + long seq_rtt_us, long sack_rtt_us, + long ca_rtt_us, struct rate_sample *rs) { const struct tcp_sock *tp = tcp_sk(sk); @@ -2936,9 +2939,13 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, * See draft-ietf-tcplw-high-performance-00, section 3.3. */ if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && - flag & FLAG_ACKED) - seq_rtt_us = ca_rtt_us = jiffies_to_usecs(tcp_time_stamp - - tp->rx_opt.rcv_tsecr); + flag & FLAG_ACKED) { + u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; + u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + + seq_rtt_us = ca_rtt_us = delta_us; + } + rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */ if (seq_rtt_us < 0) return false; @@ -2958,16 +2965,13 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req) { + struct rate_sample rs; long rtt_us = -1L; - if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) { - struct skb_mstamp now; + if (req && !req->num_retrans && tcp_rsk(req)->snt_synack) + rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack); - skb_mstamp_get(&now); - rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack); - } - - tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us); + tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs); } @@ -2976,7 +2980,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) const struct inet_connection_sock *icsk = inet_csk(sk); icsk->icsk_ca_ops->cong_avoid(sk, ack, acked); - tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; + tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32; } /* Restart timer after forward progress on connection. @@ -3001,14 +3005,14 @@ void tcp_rearm_rto(struct sock *sk) if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { struct sk_buff *skb = tcp_write_queue_head(sk); - const u32 rto_time_stamp = - tcp_skb_timestamp(skb) + rto; - s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); - /* delta may not be positive if the socket is locked + u64 rto_time_stamp = skb->skb_mstamp + + jiffies_to_usecs(rto); + s64 delta_us = rto_time_stamp - tp->tcp_mstamp; + /* delta_us may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. */ - if (delta > 0) - rto = delta; + if (delta_us > 0) + rto = usecs_to_jiffies(delta_us); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); @@ -3060,9 +3064,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, struct tcp_sacktag_state *sack) { const struct inet_connection_sock *icsk = inet_csk(sk); - struct skb_mstamp first_ackt, last_ackt; + u64 first_ackt, last_ackt; struct tcp_sock *tp = tcp_sk(sk); - struct skb_mstamp *now = &tp->tcp_mstamp; u32 prior_sacked = tp->sacked_out; u32 reord = tp->packets_out; bool fully_acked = true; @@ -3075,7 +3078,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, bool rtt_update; int flag = 0; - first_ackt.v64 = 0; + first_ackt = 0; while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); @@ -3106,8 +3109,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, flag |= FLAG_RETRANS_DATA_ACKED; } else if (!(sacked & TCPCB_SACKED_ACKED)) { last_ackt = skb->skb_mstamp; - WARN_ON_ONCE(last_ackt.v64 == 0); - if (!first_ackt.v64) + WARN_ON_ONCE(last_ackt == 0); + if (!first_ackt) first_ackt = last_ackt; last_in_flight = TCP_SKB_CB(skb)->tx.in_flight; @@ -3122,7 +3125,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->delivered += acked_pcount; if (!tcp_skb_spurious_retrans(tp, skb)) tcp_rack_advance(tp, sacked, scb->end_seq, - &skb->skb_mstamp); + skb->skb_mstamp); } if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; @@ -3165,17 +3168,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; - if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) { - seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt); - ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt); + if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) { + seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt); + ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt); } - if (sack->first_sackt.v64) { - sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt); - ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt); + if (sack->first_sackt) { + sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt); + ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt); } - sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */ rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us, - ca_rtt_us); + ca_rtt_us, sack->rate); if (flag & FLAG_ACKED) { tcp_rearm_rto(sk); @@ -3201,7 +3203,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); } else if (skb && rtt_update && sack_rtt_us >= 0 && - sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) { + sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent * after when the head was last (re)transmitted. Otherwise the * timeout may continue to extend in loss recovery. @@ -3211,7 +3213,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (icsk->icsk_ca_ops->pkts_acked) { struct ack_sample sample = { .pkts_acked = pkts_acked, - .rtt_us = ca_rtt_us, + .rtt_us = sack->rate->rtt_us, .in_flight = last_in_flight }; icsk->icsk_ca_ops->pkts_acked(sk, &sample); @@ -3390,7 +3392,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, u32 *last_oow_ack_time) { if (*last_oow_ack_time) { - s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); + s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { NET_INC_STATS(net, mib_idx); @@ -3398,7 +3400,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, } } - *last_oow_ack_time = tcp_time_stamp; + *last_oow_ack_time = tcp_jiffies32; return false; /* not rate-limited: go ahead, send dupack now! */ } @@ -3553,7 +3555,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ - sack_state.first_sackt.v64 = 0; + sack_state.first_sackt = 0; sack_state.rate = &rs; /* We very likely will need to access write queue head. */ @@ -3565,7 +3567,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (before(ack, prior_snd_una)) { /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ if (before(ack, prior_snd_una - tp->max_window)) { - tcp_send_challenge_ack(sk, skb); + if (!(flag & FLAG_NO_CHALLENGE_ACK)) + tcp_send_challenge_ack(sk, skb); return -1; } goto old_ack; @@ -3636,7 +3639,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) */ sk->sk_err_soft = 0; icsk->icsk_probes_out = 0; - tp->rcv_tstamp = tcp_time_stamp; + tp->rcv_tstamp = tcp_jiffies32; if (!prior_packets) goto no_queue; @@ -3718,7 +3721,8 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie, * But, this can also be called on packets in the established flow when * the fast version below fails. */ -void tcp_parse_options(const struct sk_buff *skb, +void tcp_parse_options(const struct net *net, + const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc) { @@ -3759,7 +3763,7 @@ void tcp_parse_options(const struct sk_buff *skb, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && sysctl_tcp_window_scaling) { + !estab && net->ipv4.sysctl_tcp_window_scaling) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > TCP_MAX_WSCALE) { @@ -3775,7 +3779,7 @@ void tcp_parse_options(const struct sk_buff *skb, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && sysctl_tcp_timestamps))) { + (!estab && net->ipv4.sysctl_tcp_timestamps))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); @@ -3783,7 +3787,7 @@ void tcp_parse_options(const struct sk_buff *skb, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && sysctl_tcp_sack) { + !estab && net->ipv4.sysctl_tcp_sack) { opt_rx->sack_ok = TCP_SACK_SEEN; tcp_sack_reset(opt_rx); } @@ -3852,7 +3856,8 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr /* Fast parse options. This hopes to only see timestamps. * If it is wrong it falls back on tcp_parse_options(). */ -static bool tcp_fast_parse_options(const struct sk_buff *skb, +static bool tcp_fast_parse_options(const struct net *net, + const struct sk_buff *skb, const struct tcphdr *th, struct tcp_sock *tp) { /* In the spirit of fast parsing, compare doff directly to constant @@ -3867,7 +3872,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, return true; } - tcp_parse_options(skb, &tp->rx_opt, 1, NULL); + tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL); if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5019,7 +5024,7 @@ static void tcp_new_space(struct sock *sk) if (tcp_should_expand_sndbuf(sk)) { tcp_sndbuf_expand(sk); - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } sk->sk_write_space(sk); @@ -5228,7 +5233,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, bool rst_seq_match = false; /* RFC1323: H1. Apply PAWS check first. */ - if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && + if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) && + tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); @@ -5356,7 +5362,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); - skb_mstamp_get(&tp->tcp_mstamp); + tcp_mstamp_refresh(tp); if (unlikely(!sk->sk_rx_dst)) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* @@ -5554,7 +5560,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) struct inet_connection_sock *icsk = inet_csk(sk); tcp_set_state(sk, TCP_ESTABLISHED); - icsk->icsk_ack.lrcvtime = tcp_time_stamp; + icsk->icsk_ack.lrcvtime = tcp_jiffies32; if (skb) { icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); @@ -5571,7 +5577,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) /* Prevent spurious tcp_cwnd_restart() on first data * packet. */ - tp->lsndtime = tcp_time_stamp; + tp->lsndtime = tcp_jiffies32; tcp_init_buffer_space(sk); @@ -5599,7 +5605,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, /* Get original SYNACK MSS value if user MSS sets mss_clamp */ tcp_clear_options(&opt); opt.user_mss = opt.mss_clamp = 0; - tcp_parse_options(synack, &opt, 0, NULL); + tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL); mss = opt.mss_clamp; } @@ -5653,7 +5659,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, int saved_clamp = tp->rx_opt.mss_clamp; bool fastopen_fail; - tcp_parse_options(skb, &tp->rx_opt, 0, &foc); + tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc); if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5672,7 +5678,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, - tcp_time_stamp)) { + tcp_time_stamp(tp))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED); goto reset_and_undo; @@ -5917,7 +5923,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) case TCP_SYN_SENT: tp->rx_opt.saw_tstamp = 0; - skb_mstamp_get(&tp->tcp_mstamp); + tcp_mstamp_refresh(tp); queued = tcp_rcv_synsent_state_process(sk, skb, th); if (queued >= 0) return queued; @@ -5929,7 +5935,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) return 0; } - skb_mstamp_get(&tp->tcp_mstamp); + tcp_mstamp_refresh(tp); tp->rx_opt.saw_tstamp = 0; req = tp->fastopen_rsk; if (req) { @@ -5948,13 +5954,17 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) /* step 5: check the ACK field */ acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | - FLAG_UPDATE_TS_RECENT) > 0; + FLAG_UPDATE_TS_RECENT | + FLAG_NO_CHALLENGE_ACK) > 0; + if (!acceptable) { + if (sk->sk_state == TCP_SYN_RECV) + return 1; /* send one RST */ + tcp_send_challenge_ack(sk, skb); + goto discard; + } switch (sk->sk_state) { case TCP_SYN_RECV: - if (!acceptable) - return 1; - if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); @@ -6008,7 +6018,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_update_pacing_rate(sk); /* Prevent spurious tcp_cwnd_restart() on first data packet */ - tp->lsndtime = tcp_time_stamp; + tp->lsndtime = tcp_jiffies32; tcp_initialize_rcv_mss(sk); tcp_fast_path_on(tp); @@ -6023,14 +6033,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) * our SYNACK so stop the SYNACK timer. */ if (req) { - /* Return RST if ack_seq is invalid. - * Note that RFC793 only says to generate a - * DUPACK for it but for TCP Fast Open it seems - * better to treat this case like TCP_SYN_RECV - * above. - */ - if (!acceptable) - return 1; /* We no longer need the request sock. */ reqsk_fastopen_remove(sk, req, false); tcp_rearm_rto(sk); @@ -6202,7 +6204,7 @@ static void tcp_openreq_init(struct request_sock *req, req->cookie_ts = 0; tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; - skb_mstamp_get(&tcp_rsk(req)->snt_synack); + tcp_rsk(req)->snt_synack = tcp_clock_us(); tcp_rsk(req)->last_oow_ack_time = 0; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; @@ -6330,7 +6332,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = af_ops->mss_clamp; tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); + tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, + want_cookie ? NULL : &foc); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -6348,7 +6351,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_free; if (tmp_opt.tstamp_ok) - tcp_rsk(req)->ts_off = af_ops->init_ts_off(skb); + tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); if (!want_cookie && !isn) { /* Kill the following clause, if you dislike this way. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5ab2aac5ca19..1dc8c449e16a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -102,10 +102,9 @@ static u32 tcp_v4_init_seq(const struct sk_buff *skb) tcp_hdr(skb)->source); } -static u32 tcp_v4_init_ts_off(const struct sk_buff *skb) +static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) { - return secure_tcp_ts_off(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr); + return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -242,7 +241,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr, inet->inet_sport, usin->sin_port); - tp->tsoffset = secure_tcp_ts_off(inet->inet_saddr, + tp->tsoffset = secure_tcp_ts_off(sock_net(sk), + inet->inet_saddr, inet->inet_daddr); } @@ -376,8 +376,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) struct sock *sk; struct sk_buff *skb; struct request_sock *fastopen; - __u32 seq, snd_una; - __u32 remaining; + u32 seq, snd_una; + s32 remaining; + u32 delta_us; int err; struct net *net = dev_net(icmp_skb->dev); @@ -483,11 +484,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) skb = tcp_write_queue_head(sk); BUG_ON(!skb); + tcp_mstamp_refresh(tp); + delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp); remaining = icsk->icsk_rto - - min(icsk->icsk_rto, - tcp_time_stamp - tcp_skb_timestamp(skb)); + usecs_to_jiffies(delta_us); - if (remaining) { + if (remaining > 0) { inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, remaining, TCP_RTO_MAX); } else { @@ -811,7 +813,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcp_time_stamp + tcptw->tw_ts_offset, + tcp_time_stamp_raw() + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), @@ -839,7 +841,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp + tcp_rsk(req)->ts_off, + tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, req->ts_recent, 0, tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, @@ -2385,6 +2387,7 @@ struct proto tcp_prot = { .unhash = inet_unhash, .get_port = inet_csk_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, + .leave_memory_pressure = tcp_leave_memory_pressure, .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, .orphan_count = &tcp_orphan_count, @@ -2463,6 +2466,9 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256); + net->ipv4.sysctl_tcp_sack = 1; + net->ipv4.sysctl_tcp_window_scaling = 1; + net->ipv4.sysctl_tcp_timestamps = 1; return 0; fail: diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index d6fb6c067af4..ae10ed64fe13 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -37,7 +37,7 @@ #include <net/tcp.h> /* resolution of owd */ -#define LP_RESOL 1000 +#define LP_RESOL TCP_TS_HZ /** * enum tcp_lp_state @@ -147,9 +147,9 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk) tp->rx_opt.rcv_tsecr == lp->local_ref_time) goto out; - m = HZ * (tp->rx_opt.rcv_tsval - - lp->remote_ref_time) / (tp->rx_opt.rcv_tsecr - - lp->local_ref_time); + m = TCP_TS_HZ * + (tp->rx_opt.rcv_tsval - lp->remote_ref_time) / + (tp->rx_opt.rcv_tsecr - lp->local_ref_time); if (m < 0) m = -m; @@ -194,7 +194,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk) if (lp->flag & LP_VALID_RHZ) { owd = tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) - - tp->rx_opt.rcv_tsecr * (LP_RESOL / HZ); + tp->rx_opt.rcv_tsecr * (LP_RESOL / TCP_TS_HZ); if (owd < 0) owd = -owd; } @@ -264,18 +264,19 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) { struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); + u32 now = tcp_time_stamp(tp); u32 delta; if (sample->rtt_us > 0) tcp_lp_rtt_sample(sk, sample->rtt_us); /* calc inference */ - delta = tcp_time_stamp - tp->rx_opt.rcv_tsecr; + delta = now - tp->rx_opt.rcv_tsecr; if ((s32)delta > 0) lp->inference = 3 * delta; /* test if within inference */ - if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference)) + if (lp->last_drop && (now - lp->last_drop < lp->inference)) lp->flag |= LP_WITHIN_INF; else lp->flag &= ~LP_WITHIN_INF; @@ -312,7 +313,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U); /* record this drop time */ - lp->last_drop = tcp_time_stamp; + lp->last_drop = now; } static struct tcp_congestion_ops tcp_lp __read_mostly = { diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 653bbd67e3a3..102b2c90bb80 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -524,7 +524,7 @@ reset: tp->snd_cwnd = 1; else tp->snd_cwnd = tcp_init_cwnd(tp, dst); - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 717be4de5324..d30ee31e94eb 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -98,7 +98,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tcp_parse_options(skb, &tmp_opt, 0, NULL); + tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { if (tmp_opt.rcv_tsecr) @@ -445,9 +445,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->srtt_us = 0; newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U); + minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); newicsk->icsk_rto = TCP_TIMEOUT_INIT; - newicsk->icsk_ack.lrcvtime = tcp_time_stamp; + newicsk->icsk_ack.lrcvtime = tcp_jiffies32; newtp->packets_out = 0; newtp->retrans_out = 0; @@ -455,7 +455,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->fackets_out = 0; newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; newtp->tlp_high_seq = 0; - newtp->lsndtime = treq->snt_synack.stamp_jiffies; + newtp->lsndtime = tcp_jiffies32; newsk->sk_txhash = treq->txhash; newtp->last_oow_ack_time = 0; newtp->total_retrans = req->num_retrans; @@ -526,7 +526,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->fastopen_req = NULL; newtp->fastopen_rsk = NULL; newtp->syn_data_acked = 0; - newtp->rack.mstamp.v64 = 0; + newtp->rack.mstamp = 0; newtp->rack.advanced = 0; __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); @@ -559,7 +559,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(skb, &tmp_opt, 0, NULL); + tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index 5de82a8d4d87..6d650ed3cb59 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -424,8 +424,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) } /* Extract info for Tcp socket info provided via netlink */ -size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr, - union tcp_cc_info *info) +static size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) { const struct tcpnv *ca = inet_csk_ca(sk); @@ -440,7 +440,6 @@ size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr, } return 0; } -EXPORT_SYMBOL_GPL(tcpnv_get_info); static struct tcp_congestion_ops tcpnv __read_mostly = { .init = tcpnv_init, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4858e190f6ac..9a9c395b6235 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -151,7 +151,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta) while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) cwnd >>= 1; tp->snd_cwnd = max(cwnd, restart_cwnd); - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_cwnd_used = 0; } @@ -160,7 +160,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - const u32 now = tcp_time_stamp; + const u32 now = tcp_jiffies32; if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); @@ -569,18 +569,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(sysctl_tcp_timestamps && !*md5)) { + if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } - if (likely(sysctl_tcp_window_scaling)) { + if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; remaining -= TCPOLEN_WSCALE_ALIGNED; } - if (likely(sysctl_tcp_sack)) { + if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) remaining -= TCPOLEN_SACKPERM_ALIGNED; @@ -904,6 +904,72 @@ out: sk_free(sk); } +/* Note: Called under hard irq. + * We can not call TCP stack right away. + */ +enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer) +{ + struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer); + struct sock *sk = (struct sock *)tp; + unsigned long nval, oval; + + for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { + struct tsq_tasklet *tsq; + bool empty; + + if (oval & TSQF_QUEUED) + break; + + nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED; + nval = cmpxchg(&sk->sk_tsq_flags, oval, nval); + if (nval != oval) + continue; + + if (!atomic_inc_not_zero(&sk->sk_wmem_alloc)) + break; + /* queue this socket to tasklet queue */ + tsq = this_cpu_ptr(&tsq_tasklet); + empty = list_empty(&tsq->head); + list_add(&tp->tsq_node, &tsq->head); + if (empty) + tasklet_schedule(&tsq->tasklet); + break; + } + return HRTIMER_NORESTART; +} + +/* BBR congestion control needs pacing. + * Same remark for SO_MAX_PACING_RATE. + * sch_fq packet scheduler is efficiently handling pacing, + * but is not always installed/used. + * Return true if TCP stack should pace packets itself. + */ +static bool tcp_needs_internal_pacing(const struct sock *sk) +{ + return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED; +} + +static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb) +{ + u64 len_ns; + u32 rate; + + if (!tcp_needs_internal_pacing(sk)) + return; + rate = sk->sk_pacing_rate; + if (!rate || rate == ~0U) + return; + + /* Should account for header sizes as sch_fq does, + * but lets make things simple. + */ + len_ns = (u64)skb->len * NSEC_PER_SEC; + do_div(len_ns, rate); + hrtimer_start(&tcp_sk(sk)->pacing_timer, + ktime_add_ns(ktime_get(), len_ns), + HRTIMER_MODE_ABS_PINNED); +} + /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial * transmission and possible later retransmissions. @@ -931,8 +997,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, BUG_ON(!skb || !tcp_skb_pcount(skb)); tp = tcp_sk(sk); + skb->skb_mstamp = tp->tcp_mstamp; if (clone_it) { - skb_mstamp_get(&skb->skb_mstamp); TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; tcp_rate_skb_sent(sk, skb); @@ -1034,6 +1100,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); tp->data_segs_out += tcp_skb_pcount(skb); + tcp_internal_pacing(sk, skb); } if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) @@ -1261,9 +1328,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, return 0; } -/* This is similar to __pskb_pull_head() (it will go to core/skbuff.c - * eventually). The difference is that pulled data not copied, but - * immediately discarded. +/* This is similar to __pskb_pull_tail(). The difference is that pulled + * data is not copied, but immediately discarded. */ static int __pskb_trim_head(struct sk_buff *skb, int len) { @@ -1298,7 +1364,6 @@ static int __pskb_trim_head(struct sk_buff *skb, int len) } shinfo->nr_frags = k; - skb_reset_tail_pointer(skb); skb->data_len -= len; skb->len = skb->data_len; return len; @@ -1408,7 +1473,7 @@ void tcp_mtup_init(struct sock *sk) icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); icsk->icsk_mtup.probe_size = 0; if (icsk->icsk_mtup.enabled) - icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; + icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } EXPORT_SYMBOL(tcp_mtup_init); @@ -1509,7 +1574,7 @@ static void tcp_cwnd_application_limited(struct sock *sk) } tp->snd_cwnd_used = 0; } - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) @@ -1530,14 +1595,14 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) if (tcp_is_cwnd_limited(sk)) { /* Network is feed fully. */ tp->snd_cwnd_used = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } else { /* Network starves. */ if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; if (sysctl_tcp_slow_start_after_idle && - (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && + (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && !ca_ops->cong_control) tcp_cwnd_application_limited(sk); @@ -1839,7 +1904,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, const struct inet_connection_sock *icsk = inet_csk(sk); u32 age, send_win, cong_win, limit, in_flight; struct tcp_sock *tp = tcp_sk(sk); - struct skb_mstamp now; struct sk_buff *head; int win_divisor; @@ -1852,7 +1916,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, /* Avoid bursty behavior by allowing defer * only if the last write was recent. */ - if ((s32)(tcp_time_stamp - tp->lsndtime) > 0) + if ((s32)(tcp_jiffies32 - tp->lsndtime) > 0) goto send_now; in_flight = tcp_packets_in_flight(tp); @@ -1895,8 +1959,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, } head = tcp_write_queue_head(sk); - skb_mstamp_get(&now); - age = skb_mstamp_us_delta(&now, &head->skb_mstamp); + + age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp); /* If next ACK is likely to come too late (half srtt), do not defer */ if (age < (tp->srtt_us >> 4)) goto send_now; @@ -1921,7 +1985,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) s32 delta; interval = net->ipv4.sysctl_tcp_probe_interval; - delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp; + delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp; if (unlikely(delta >= interval * HZ)) { int mss = tcp_current_mss(sk); @@ -1933,7 +1997,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); /* Update probe time stamp */ - icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; + icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } } @@ -2086,6 +2150,12 @@ static int tcp_mtu_probe(struct sock *sk) return -1; } +static bool tcp_pacing_check(const struct sock *sk) +{ + return tcp_needs_internal_pacing(sk) && + hrtimer_active(&tcp_sk(sk)->pacing_timer); +} + /* TCP Small Queues : * Control number of packets in qdisc/devices to two packets / or ~1 ms. * (These limits are doubled for retransmits) @@ -2130,7 +2200,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new) { - const u32 now = tcp_time_stamp; + const u32 now = tcp_jiffies32; if (tp->chrono_type > TCP_CHRONO_UNSPEC) tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start; @@ -2207,15 +2277,19 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } max_segs = tcp_tso_segs(sk, mss_now); + tcp_mstamp_refresh(tp); while ((skb = tcp_send_head(sk))) { unsigned int limit; + if (tcp_pacing_check(sk)) + break; + tso_segs = tcp_init_tso_segs(skb, mss_now); BUG_ON(!tso_segs); if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { /* "skb_mstamp" is used as a start point for the retransmit timer */ - skb_mstamp_get(&skb->skb_mstamp); + skb->skb_mstamp = tp->tcp_mstamp; goto repair; /* Skip network transmission */ } @@ -2342,10 +2416,10 @@ bool tcp_schedule_loss_probe(struct sock *sk) timeout = max_t(u32, timeout, msecs_to_jiffies(10)); /* If RTO is shorter, just schedule TLP in its place. */ - tlp_time_stamp = tcp_time_stamp + timeout; + tlp_time_stamp = tcp_jiffies32 + timeout; rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { - s32 delta = rto_time_stamp - tcp_time_stamp; + s32 delta = rto_time_stamp - tcp_jiffies32; if (delta > 0) timeout = delta; } @@ -2803,7 +2877,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) skb_headroom(skb) >= 0xFFFF)) { struct sk_buff *nskb; - skb_mstamp_get(&skb->skb_mstamp); + skb->skb_mstamp = tp->tcp_mstamp; nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; @@ -2878,6 +2952,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (skb == tcp_send_head(sk)) break; + + if (tcp_pacing_check(sk)) + break; + /* we could do better than to assign each time */ if (!hole) tp->retransmit_skb_hint = skb; @@ -3015,7 +3093,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) skb_reserve(skb, MAX_TCP_HEADER); tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), TCPHDR_ACK | TCPHDR_RST); - skb_mstamp_get(&skb->skb_mstamp); + tcp_mstamp_refresh(tcp_sk(sk)); /* Send it off. */ if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); @@ -3111,10 +3189,10 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, memset(&opts, 0, sizeof(opts)); #ifdef CONFIG_SYN_COOKIES if (unlikely(req->cookie_ts)) - skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req); + skb->skb_mstamp = cookie_init_timestamp(req); else #endif - skb_mstamp_get(&skb->skb_mstamp); + skb->skb_mstamp = tcp_clock_us(); #ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); @@ -3193,8 +3271,9 @@ static void tcp_connect_init(struct sock *sk) /* We'll fix this up when we get a response from the other end. * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ - tp->tcp_header_len = sizeof(struct tcphdr) + - (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); + tp->tcp_header_len = sizeof(struct tcphdr); + if (sock_net(sk)->ipv4.sysctl_tcp_timestamps) + tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; #ifdef CONFIG_TCP_MD5SIG if (tp->af_specific->md5_lookup(sk, sk)) @@ -3225,7 +3304,7 @@ static void tcp_connect_init(struct sock *sk) tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - sysctl_tcp_window_scaling, + sock_net(sk)->ipv4.sysctl_tcp_window_scaling, &rcv_wscale, dst_metric(dst, RTAX_INITRWND)); @@ -3244,7 +3323,7 @@ static void tcp_connect_init(struct sock *sk) if (likely(!tp->repair)) tp->rcv_nxt = 0; else - tp->rcv_tstamp = tcp_time_stamp; + tp->rcv_tstamp = tcp_jiffies32; tp->rcv_wup = tp->rcv_nxt; tp->copied_seq = tp->rcv_nxt; @@ -3373,7 +3452,8 @@ int tcp_connect(struct sock *sk) return -ENOBUFS; tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); - tp->retrans_stamp = tcp_time_stamp; + tcp_mstamp_refresh(tp); + tp->retrans_stamp = tcp_time_stamp(tp); tcp_connect_queue_skb(sk, buff); tcp_ecn_send_syn(sk, buff); @@ -3492,7 +3572,6 @@ void tcp_send_ack(struct sock *sk) skb_set_tcp_pure_ack(buff); /* Send it off, this clears delayed acks for us. */ - skb_mstamp_get(&buff->skb_mstamp); tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0); } EXPORT_SYMBOL_GPL(tcp_send_ack); @@ -3526,15 +3605,16 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) * send it. */ tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); - skb_mstamp_get(&skb->skb_mstamp); NET_INC_STATS(sock_net(sk), mib); return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0); } +/* Called from setsockopt( ... TCP_REPAIR ) */ void tcp_send_window_probe(struct sock *sk) { if (sk->sk_state == TCP_ESTABLISHED) { tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; + tcp_mstamp_refresh(tcp_sk(sk)); tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE); } } diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index c6a9fa894646..ad99569d4c1e 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -78,7 +78,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *scb = TCP_SKB_CB(skb); - if (!scb->tx.delivered_mstamp.v64) + if (!scb->tx.delivered_mstamp) return; if (!rs->prior_delivered || @@ -89,9 +89,9 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, rs->is_retrans = scb->sacked & TCPCB_RETRANS; /* Find the duration of the "send phase" of this window: */ - rs->interval_us = skb_mstamp_us_delta( - &skb->skb_mstamp, - &scb->tx.first_tx_mstamp); + rs->interval_us = tcp_stamp_us_delta( + skb->skb_mstamp, + scb->tx.first_tx_mstamp); /* Record send time of most recently ACKed packet: */ tp->first_tx_mstamp = skb->skb_mstamp; @@ -101,7 +101,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, * we don't need to reset since it'll be freed soon. */ if (scb->sacked & TCPCB_SACKED_ACKED) - scb->tx.delivered_mstamp.v64 = 0; + scb->tx.delivered_mstamp = 0; } /* Update the connection delivery information and generate a rate sample. */ @@ -125,7 +125,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ rs->losses = lost; /* freshly marked lost */ /* Return an invalid sample if no timing information is available. */ - if (!rs->prior_mstamp.v64) { + if (!rs->prior_mstamp) { rs->delivered = -1; rs->interval_us = -1; return; @@ -138,8 +138,8 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, * longer phase. */ snd_us = rs->interval_us; /* send phase */ - ack_us = skb_mstamp_us_delta(&tp->tcp_mstamp, - &rs->prior_mstamp); /* ack phase */ + ack_us = tcp_stamp_us_delta(tp->tcp_mstamp, + rs->prior_mstamp); /* ack phase */ rs->interval_us = max(snd_us, ack_us); /* Normally we expect interval_us >= min-rtt. diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 362b8c75bfab..fe9a493d0208 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -17,12 +17,9 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) } } -static bool tcp_rack_sent_after(const struct skb_mstamp *t1, - const struct skb_mstamp *t2, - u32 seq1, u32 seq2) +static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) { - return skb_mstamp_after(t1, t2) || - (t1->v64 == t2->v64 && after(seq1, seq2)); + return t1 > t2 || (t1 == t2 && after(seq1, seq2)); } /* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01): @@ -72,14 +69,14 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) scb->sacked & TCPCB_SACKED_ACKED) continue; - if (tcp_rack_sent_after(&tp->rack.mstamp, &skb->skb_mstamp, + if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp, tp->rack.end_seq, scb->end_seq)) { /* Step 3 in draft-cheng-tcpm-rack-00.txt: * A packet is lost if its elapsed time is beyond * the recent RTT plus the reordering window. */ - u32 elapsed = skb_mstamp_us_delta(&tp->tcp_mstamp, - &skb->skb_mstamp); + u32 elapsed = tcp_stamp_us_delta(tp->tcp_mstamp, + skb->skb_mstamp); s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed; if (remaining < 0) { @@ -127,16 +124,16 @@ void tcp_rack_mark_lost(struct sock *sk) * draft-cheng-tcpm-rack-00.txt */ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, - const struct skb_mstamp *xmit_time) + u64 xmit_time) { u32 rtt_us; - if (tp->rack.mstamp.v64 && - !tcp_rack_sent_after(xmit_time, &tp->rack.mstamp, + if (tp->rack.mstamp && + !tcp_rack_sent_after(xmit_time, tp->rack.mstamp, end_seq, tp->rack.end_seq)) return; - rtt_us = skb_mstamp_us_delta(&tp->tcp_mstamp, xmit_time); + rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time); if (sacked & TCPCB_RETRANS) { /* If the sacked packet was retransmitted, it's ambiguous * whether the retransmission or the original (or the prior @@ -152,7 +149,7 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, return; } tp->rack.rtt_us = rtt_us; - tp->rack.mstamp = *xmit_time; + tp->rack.mstamp = xmit_time; tp->rack.end_seq = end_seq; tp->rack.advanced = 1; } @@ -166,7 +163,6 @@ void tcp_rack_reo_timeout(struct sock *sk) u32 timeout, prior_inflight; prior_inflight = tcp_packets_in_flight(tp); - skb_mstamp_get(&tp->tcp_mstamp); tcp_rack_detect_loss(sk, &timeout); if (prior_inflight != tcp_packets_in_flight(tp)) { if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 14672543cf0b..c0feeeef962a 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -63,7 +63,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ - if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) + if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) shift++; /* If some dubious ICMP arrived, penalize even more. */ @@ -73,7 +73,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) if (tcp_check_oom(sk, shift)) { /* Catch exceptional cases, when connection requires reset. * 1. Last segment was sent recently. */ - if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || + if ((s32)(tcp_jiffies32 - tp->lsndtime) <= TCP_TIMEWAIT_LEN || /* 2. Window is closed. */ (!tp->snd_wnd && !tp->packets_out)) do_reset = true; @@ -115,7 +115,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) if (net->ipv4.sysctl_tcp_mtu_probing) { if (!icsk->icsk_mtup.enabled) { icsk->icsk_mtup.enabled = 1; - icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; + icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { struct net *net = sock_net(sk); @@ -139,22 +139,18 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) * @timeout: A custom timeout value. * If set to 0 the default timeout is calculated and used. * Using TCP_RTO_MIN and the number of unsuccessful retransmits. - * @syn_set: true if the SYN Bit was set. * * The default "timeout" value this function can calculate and use * is equivalent to the timeout of a TCP Connection * after "boundary" unsuccessful, exponentially backed-off - * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if - * syn_set flag is set. - * + * retransmissions with an initial RTO of TCP_RTO_MIN. */ static bool retransmits_timed_out(struct sock *sk, unsigned int boundary, - unsigned int timeout, - bool syn_set) + unsigned int timeout) { + const unsigned int rto_base = TCP_RTO_MIN; unsigned int linear_backoff_thresh, start_ts; - unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN; if (!inet_csk(sk)->icsk_retransmits) return false; @@ -172,7 +168,7 @@ static bool retransmits_timed_out(struct sock *sk, timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; } - return (tcp_time_stamp - start_ts) >= timeout; + return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= jiffies_to_msecs(timeout); } /* A write timeout has occurred. Process the after effects. */ @@ -181,8 +177,8 @@ static int tcp_write_timeout(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); + bool expired, do_reset; int retry_until; - bool do_reset, syn_set = false; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) { @@ -196,9 +192,9 @@ static int tcp_write_timeout(struct sock *sk) sk_rethink_txhash(sk); } retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; - syn_set = true; + expired = icsk->icsk_retransmits >= retry_until; } else { - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) { + if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { /* Some middle-boxes may black-hole Fast Open _after_ * the handshake. Therefore we conservatively disable * Fast Open on this path on recurring timeouts after @@ -224,15 +220,15 @@ static int tcp_write_timeout(struct sock *sk) retry_until = tcp_orphan_retries(sk, alive); do_reset = alive || - !retransmits_timed_out(sk, retry_until, 0, 0); + !retransmits_timed_out(sk, retry_until, 0); if (tcp_out_of_resources(sk, do_reset)) return 1; } + expired = retransmits_timed_out(sk, retry_until, + icsk->icsk_user_timeout); } - - if (retransmits_timed_out(sk, retry_until, - syn_set ? 0 : icsk->icsk_user_timeout, syn_set)) { + if (expired) { /* Has it gone just too far? */ tcp_write_err(sk); return 1; @@ -339,9 +335,10 @@ static void tcp_probe_timer(struct sock *sk) */ start_ts = tcp_skb_timestamp(tcp_send_head(sk)); if (!start_ts) - skb_mstamp_get(&tcp_send_head(sk)->skb_mstamp); + tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp; else if (icsk->icsk_user_timeout && - (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout) + (s32)(tcp_time_stamp(tp) - start_ts) > + jiffies_to_msecs(icsk->icsk_user_timeout)) goto abort; max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; @@ -451,7 +448,7 @@ void tcp_retransmit_timer(struct sock *sk) tp->snd_una, tp->snd_nxt); } #endif - if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { + if (tcp_jiffies32 - tp->rcv_tstamp > TCP_RTO_MAX) { tcp_write_err(sk); goto out; } @@ -539,7 +536,7 @@ out_reset_timer: icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0, 0)) + if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0)) __sk_dst_reset(sk); out:; @@ -561,6 +558,7 @@ void tcp_write_timer_handler(struct sock *sk) goto out; } + tcp_mstamp_refresh(tcp_sk(sk)); event = icsk->icsk_pending; switch (event) { @@ -710,4 +708,7 @@ void tcp_init_xmit_timers(struct sock *sk) { inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, &tcp_keepalive_timer); + hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS_PINNED); + tcp_sk(sk)->pacing_timer.function = tcp_pace_kick; } diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 9775453b8d17..bec9cafbe3f9 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -68,7 +68,7 @@ static void tcp_westwood_init(struct sock *sk) w->cumul_ack = 0; w->reset_rtt_min = 1; w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT; - w->rtt_win_sx = tcp_time_stamp; + w->rtt_win_sx = tcp_jiffies32; w->snd_una = tcp_sk(sk)->snd_una; w->first_ack = 1; } @@ -116,7 +116,7 @@ static void tcp_westwood_pkts_acked(struct sock *sk, static void westwood_update_window(struct sock *sk) { struct westwood *w = inet_csk_ca(sk); - s32 delta = tcp_time_stamp - w->rtt_win_sx; + s32 delta = tcp_jiffies32 - w->rtt_win_sx; /* Initialize w->snd_una with the first acked sequence number in order * to fix mismatch between tp->snd_una and w->snd_una for the first @@ -140,7 +140,7 @@ static void westwood_update_window(struct sock *sk) westwood_filter(w, delta); w->bk = 0; - w->rtt_win_sx = tcp_time_stamp; + w->rtt_win_sx = tcp_jiffies32; } } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1d6219bf2d6b..2bc638c48b86 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1163,23 +1163,110 @@ out: return ret; } +/* Copy as much information as possible into skb->dev_scratch to avoid + * possibly multiple cache miss on dequeue(); + */ +#if BITS_PER_LONG == 64 + +/* we can store multiple info here: truesize, len and the bit needed to + * compute skb_csum_unnecessary will be on cold cache lines at recvmsg + * time. + * skb->len can be stored on 16 bits since the udp header has been already + * validated and pulled. + */ +struct udp_dev_scratch { + u32 truesize; + u16 len; + bool is_linear; + bool csum_unnecessary; +}; + +static void udp_set_dev_scratch(struct sk_buff *skb) +{ + struct udp_dev_scratch *scratch; + + BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long)); + scratch = (struct udp_dev_scratch *)&skb->dev_scratch; + scratch->truesize = skb->truesize; + scratch->len = skb->len; + scratch->csum_unnecessary = !!skb_csum_unnecessary(skb); + scratch->is_linear = !skb_is_nonlinear(skb); +} + +static int udp_skb_truesize(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize; +} + +static unsigned int udp_skb_len(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->len; +} + +static bool udp_skb_csum_unnecessary(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary; +} + +static bool udp_skb_is_linear(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear; +} + +#else +static void udp_set_dev_scratch(struct sk_buff *skb) +{ + skb->dev_scratch = skb->truesize; +} + +static int udp_skb_truesize(struct sk_buff *skb) +{ + return skb->dev_scratch; +} + +static unsigned int udp_skb_len(struct sk_buff *skb) +{ + return skb->len; +} + +static bool udp_skb_csum_unnecessary(struct sk_buff *skb) +{ + return skb_csum_unnecessary(skb); +} + +static bool udp_skb_is_linear(struct sk_buff *skb) +{ + return !skb_is_nonlinear(skb); +} +#endif + /* fully reclaim rmem/fwd memory allocated for skb */ -static void udp_rmem_release(struct sock *sk, int size, int partial) +static void udp_rmem_release(struct sock *sk, int size, int partial, + bool rx_queue_lock_held) { struct udp_sock *up = udp_sk(sk); + struct sk_buff_head *sk_queue; int amt; if (likely(partial)) { up->forward_deficit += size; size = up->forward_deficit; if (size < (sk->sk_rcvbuf >> 2) && - !skb_queue_empty(&sk->sk_receive_queue)) + !skb_queue_empty(&up->reader_queue)) return; } else { size += up->forward_deficit; } up->forward_deficit = 0; + /* acquire the sk_receive_queue for fwd allocated memory scheduling, + * if the called don't held it already + */ + sk_queue = &sk->sk_receive_queue; + if (!rx_queue_lock_held) + spin_lock(&sk_queue->lock); + + sk->sk_forward_alloc += size; amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1); sk->sk_forward_alloc -= amt; @@ -1188,19 +1275,33 @@ static void udp_rmem_release(struct sock *sk, int size, int partial) __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT); atomic_sub(size, &sk->sk_rmem_alloc); + + /* this can save us from acquiring the rx queue lock on next receive */ + skb_queue_splice_tail_init(sk_queue, &up->reader_queue); + + if (!rx_queue_lock_held) + spin_unlock(&sk_queue->lock); } -/* Note: called with sk_receive_queue.lock held. +/* Note: called with reader_queue.lock held. * Instead of using skb->truesize here, find a copy of it in skb->dev_scratch * This avoids a cache line miss while receive_queue lock is held. * Look at __udp_enqueue_schedule_skb() to find where this copy is done. */ void udp_skb_destructor(struct sock *sk, struct sk_buff *skb) { - udp_rmem_release(sk, skb->dev_scratch, 1); + prefetch(&skb->data); + udp_rmem_release(sk, udp_skb_truesize(skb), 1, false); } EXPORT_SYMBOL(udp_skb_destructor); +/* as above, but the caller held the rx queue lock, too */ +static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb) +{ + prefetch(&skb->data); + udp_rmem_release(sk, udp_skb_truesize(skb), 1, true); +} + /* Idea of busylocks is to let producers grab an extra spinlock * to relieve pressure on the receive_queue spinlock shared by consumer. * Under flood, this means that only one producer can be in line @@ -1252,10 +1353,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) busy = busylock_acquire(sk); } size = skb->truesize; - /* Copy skb->truesize into skb->dev_scratch to avoid a cache line miss - * in udp_skb_destructor() - */ - skb->dev_scratch = size; + udp_set_dev_scratch(skb); /* we drop only if the receive buf is full and the receive * queue contains some other skb @@ -1306,14 +1404,16 @@ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); void udp_destruct_sock(struct sock *sk) { /* reclaim completely the forward allocated memory */ + struct udp_sock *up = udp_sk(sk); unsigned int total = 0; struct sk_buff *skb; - while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + skb_queue_splice_tail_init(&sk->sk_receive_queue, &up->reader_queue); + while ((skb = __skb_dequeue(&up->reader_queue)) != NULL) { total += skb->truesize; kfree_skb(skb); } - udp_rmem_release(sk, total, 0); + udp_rmem_release(sk, total, 0, true); inet_sock_destruct(sk); } @@ -1321,6 +1421,7 @@ EXPORT_SYMBOL_GPL(udp_destruct_sock); int udp_init_sock(struct sock *sk) { + skb_queue_head_init(&udp_sk(sk)->reader_queue); sk->sk_destruct = udp_destruct_sock; return 0; } @@ -1334,10 +1435,31 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) sk_peek_offset_bwd(sk, len); unlock_sock_fast(sk, slow); } - consume_skb(skb); + + consume_stateless_skb(skb); } EXPORT_SYMBOL_GPL(skb_consume_udp); +static struct sk_buff *__first_packet_length(struct sock *sk, + struct sk_buff_head *rcvq, + int *total) +{ + struct sk_buff *skb; + + while ((skb = skb_peek(rcvq)) != NULL && + udp_lib_checksum_complete(skb)) { + __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, + IS_UDPLITE(sk)); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, + IS_UDPLITE(sk)); + atomic_inc(&sk->sk_drops); + __skb_unlink(skb, rcvq); + *total += skb->truesize; + kfree_skb(skb); + } + return skb; +} + /** * first_packet_length - return length of first packet in receive queue * @sk: socket @@ -1347,26 +1469,24 @@ EXPORT_SYMBOL_GPL(skb_consume_udp); */ static int first_packet_length(struct sock *sk) { - struct sk_buff_head *rcvq = &sk->sk_receive_queue; + struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue; + struct sk_buff_head *sk_queue = &sk->sk_receive_queue; struct sk_buff *skb; int total = 0; int res; spin_lock_bh(&rcvq->lock); - while ((skb = skb_peek(rcvq)) != NULL && - udp_lib_checksum_complete(skb)) { - __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, - IS_UDPLITE(sk)); - __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, - IS_UDPLITE(sk)); - atomic_inc(&sk->sk_drops); - __skb_unlink(skb, rcvq); - total += skb->truesize; - kfree_skb(skb); + skb = __first_packet_length(sk, rcvq, &total); + if (!skb && !skb_queue_empty(sk_queue)) { + spin_lock(&sk_queue->lock); + skb_queue_splice_tail_init(sk_queue, rcvq); + spin_unlock(&sk_queue->lock); + + skb = __first_packet_length(sk, rcvq, &total); } res = skb ? skb->len : -1; if (total) - udp_rmem_release(sk, total, 1); + udp_rmem_release(sk, total, 1, false); spin_unlock_bh(&rcvq->lock); return res; } @@ -1400,6 +1520,89 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) } EXPORT_SYMBOL(udp_ioctl); +struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, + int noblock, int *peeked, int *off, int *err) +{ + struct sk_buff_head *sk_queue = &sk->sk_receive_queue; + struct sk_buff_head *queue; + struct sk_buff *last; + long timeo; + int error; + + queue = &udp_sk(sk)->reader_queue; + flags |= noblock ? MSG_DONTWAIT : 0; + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + do { + struct sk_buff *skb; + + error = sock_error(sk); + if (error) + break; + + error = -EAGAIN; + *peeked = 0; + do { + spin_lock_bh(&queue->lock); + skb = __skb_try_recv_from_queue(sk, queue, flags, + udp_skb_destructor, + peeked, off, err, + &last); + if (skb) { + spin_unlock_bh(&queue->lock); + return skb; + } + + if (skb_queue_empty(sk_queue)) { + spin_unlock_bh(&queue->lock); + goto busy_check; + } + + /* refill the reader queue and walk it again + * keep both queues locked to avoid re-acquiring + * the sk_receive_queue lock if fwd memory scheduling + * is needed. + */ + spin_lock(&sk_queue->lock); + skb_queue_splice_tail_init(sk_queue, queue); + + skb = __skb_try_recv_from_queue(sk, queue, flags, + udp_skb_dtor_locked, + peeked, off, err, + &last); + spin_unlock(&sk_queue->lock); + spin_unlock_bh(&queue->lock); + if (skb) + return skb; + +busy_check: + if (!sk_can_busy_loop(sk)) + break; + + sk_busy_loop(sk, flags & MSG_DONTWAIT); + } while (!skb_queue_empty(sk_queue)); + + /* sk_queue is empty, reader_queue may contain peeked packets */ + } while (timeo && + !__skb_wait_for_more_packets(sk, &error, &timeo, + (struct sk_buff *)sk_queue)); + + *err = error; + return NULL; +} +EXPORT_SYMBOL_GPL(__skb_recv_udp); + +static int copy_linear_skb(struct sk_buff *skb, int len, int off, + struct iov_iter *to) +{ + int n, copy = len - off; + + n = copy_to_iter(skb->data + off, copy, to); + if (n == copy) + return 0; + + return -EFAULT; +} + /* * This should be easy, if there is something there we * return it, otherwise we block. @@ -1426,7 +1629,7 @@ try_again: if (!skb) return err; - ulen = skb->len; + ulen = udp_skb_len(skb); copied = len; if (copied > ulen - off) copied = ulen - off; @@ -1441,14 +1644,18 @@ try_again: if (copied < ulen || peeking || (is_udplite && UDP_SKB_CB(skb)->partial_cov)) { - checksum_valid = !udp_lib_checksum_complete(skb); + checksum_valid = udp_skb_csum_unnecessary(skb) || + !__udp_lib_checksum_complete(skb); if (!checksum_valid) goto csum_copy_err; } - if (checksum_valid || skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, off, msg, copied); - else { + if (checksum_valid || udp_skb_csum_unnecessary(skb)) { + if (udp_skb_is_linear(skb)) + err = copy_linear_skb(skb, copied, off, &msg->msg_iter); + else + err = skb_copy_datagram_msg(skb, off, msg, copied); + } else { err = skb_copy_and_csum_datagram_msg(skb, off, msg); if (err == -EINVAL) @@ -1490,7 +1697,8 @@ try_again: return err; csum_copy_err: - if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) { + if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags, + udp_skb_destructor)) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } @@ -1624,6 +1832,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_mark_napi_id_once(sk, skb); } + /* clear all pending head states while they are hot in the cache */ + skb_release_head_state(skb); + rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -2325,6 +2536,9 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; + if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) + mask |= POLLIN | POLLRDNORM; + sock_rps_record_flow(sk); /* Check for false positives due to checksum errors */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6a4fb1e629fb..0aa36b093013 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -963,6 +963,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, struct net *net = dev_net(idev->dev); struct inet6_ifaddr *ifa = NULL; struct rt6_info *rt; + struct in6_validator_info i6vi; unsigned int hash; int err = 0; int addr_type = ipv6_addr_type(addr); @@ -974,6 +975,9 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, return ERR_PTR(-EADDRNOTAVAIL); rcu_read_lock_bh(); + + in6_dev_hold(idev); + if (idev->dead) { err = -ENODEV; /*XXX*/ goto out2; @@ -984,6 +988,17 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, goto out2; } + i6vi.i6vi_addr = *addr; + i6vi.i6vi_dev = idev; + rcu_read_unlock_bh(); + + err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi); + + rcu_read_lock_bh(); + err = notifier_to_errno(err); + if (err) + goto out2; + spin_lock(&addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ @@ -1034,7 +1049,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa->rt = rt; ifa->idev = idev; - in6_dev_hold(idev); /* For caller */ in6_ifa_hold(ifa); @@ -1062,6 +1076,7 @@ out2: inet6addr_notifier_call_chain(NETDEV_UP, ifa); else { kfree(ifa); + in6_dev_put(idev); ifa = ERR_PTR(err); } @@ -2280,7 +2295,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, cfg.fc_flags |= RTF_NONEXTHOP; #endif - ip6_route_add(&cfg); + ip6_route_add(&cfg, NULL); } @@ -2335,7 +2350,7 @@ static void addrconf_add_mroute(struct net_device *dev) ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); - ip6_route_add(&cfg); + ip6_route_add(&cfg, NULL); } static struct inet6_dev *addrconf_add_dev(struct net_device *dev) diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index bfa941fc1165..9e3488d50b15 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -88,6 +88,7 @@ int __ipv6_addr_type(const struct in6_addr *addr) EXPORT_SYMBOL(__ipv6_addr_type); static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); +static ATOMIC_NOTIFIER_HEAD(inet6addr_validator_chain); int register_inet6addr_notifier(struct notifier_block *nb) { @@ -107,6 +108,24 @@ int inet6addr_notifier_call_chain(unsigned long val, void *v) } EXPORT_SYMBOL(inet6addr_notifier_call_chain); +int register_inet6addr_validator_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&inet6addr_validator_chain, nb); +} +EXPORT_SYMBOL(register_inet6addr_validator_notifier); + +int unregister_inet6addr_validator_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&inet6addr_validator_chain, nb); +} +EXPORT_SYMBOL(unregister_inet6addr_validator_notifier); + +int inet6addr_validator_notifier_call_chain(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&inet6addr_validator_chain, val, v); +} +EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain); + static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1, struct dst_entry **u2, struct flowi6 *u3) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index dda6035e3b84..755f38271dd5 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -423,7 +423,9 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ @@ -606,7 +608,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) ip6h->hop_limit = 0; sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 1fe99ba8066c..2ede4e459c4e 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -346,9 +346,11 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info esph = esp_output_set_esn(skb, x, ip_esp_hdr(skb), seqhi); sg_init_table(sg, esp->nfrags); - skb_to_sgvec(skb, sg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + esp->clen + alen); + err = skb_to_sgvec(skb, sg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + esp->clen + alen); + if (unlikely(err < 0)) + goto error; if (!esp->inplace) { int allocsize; @@ -372,9 +374,11 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info spin_unlock_bh(&x->lock); sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1); - skb_to_sgvec(skb, dsg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + esp->clen + alen); + err = skb_to_sgvec(skb, dsg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + esp->clen + alen); + if (unlikely(err < 0)) + goto error; } if ((x->props.flags & XFRM_STATE_ESN)) @@ -618,7 +622,9 @@ skip_cow: esp_input_set_header(skb, seqhi); sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + ret = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(ret < 0)) + goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index 9ea249b9451e..6de3c04b0f30 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -14,6 +14,8 @@ #include <net/udp.h> #include <net/udp_tunnel.h> +#if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL) + static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, struct flowi6 *fl6, u8 *protocol, __be16 sport) { @@ -33,8 +35,8 @@ static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, *protocol = IPPROTO_UDP; } -int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi6 *fl6) +static int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi6 *fl6) { __be16 sport; int err; @@ -49,10 +51,9 @@ int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, return 0; } -EXPORT_SYMBOL(fou6_build_header); -int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi6 *fl6) +static int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi6 *fl6) { __be16 sport; int err; @@ -67,9 +68,6 @@ int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, return 0; } -EXPORT_SYMBOL(gue6_build_header); - -#if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL) static const struct ip6_tnl_encap_ops fou_ip6tun_ops = { .encap_hlen = fou_encap_hlen, diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index b3df03e3faa0..0c02a09bc351 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -91,7 +91,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) drop: kfree_skb(skb); - return -EINVAL; + return err; } static int ila_input(struct sk_buff *skb) @@ -117,7 +117,8 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { static int ila_build_state(struct nlattr *nla, unsigned int family, const void *cfg, - struct lwtunnel_state **ts) + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) { struct ila_lwt *ilwt; struct ila_params *p; @@ -146,7 +147,7 @@ static int ila_build_state(struct nlattr *nla, return -EINVAL; } - ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, NULL); + ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack); if (ret < 0) return ret; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index d4bf2c68a545..deea901746c8 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -473,7 +473,8 @@ out: static struct fib6_node *fib6_add_1(struct fib6_node *root, struct in6_addr *addr, int plen, int offset, int allow_create, - int replace_required, int sernum) + int replace_required, int sernum, + struct netlink_ext_ack *extack) { struct fib6_node *fn, *in, *ln; struct fib6_node *pn = NULL; @@ -497,6 +498,8 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) { if (!allow_create) { if (replace_required) { + NL_SET_ERR_MSG(extack, + "Can not replace route - no match found"); pr_warn("Can't replace route, no match found\n"); return ERR_PTR(-ENOENT); } @@ -543,6 +546,8 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, * That would keep IPv6 consistent with IPv4 */ if (replace_required) { + NL_SET_ERR_MSG(extack, + "Can not replace route - no match found"); pr_warn("Can't replace route, no match found\n"); return ERR_PTR(-ENOENT); } @@ -964,7 +969,8 @@ void fib6_force_start_gc(struct net *net) */ int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc) + struct nl_info *info, struct mx6_config *mxc, + struct netlink_ext_ack *extack) { struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; @@ -987,7 +993,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), allow_create, - replace_required, sernum); + replace_required, sernum, extack); if (IS_ERR(fn)) { err = PTR_ERR(fn); fn = NULL; @@ -1028,7 +1034,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, sn = fib6_add_1(sfn, &rt->rt6i_src.addr, rt->rt6i_src.plen, offsetof(struct rt6_info, rt6i_src), - allow_create, replace_required, sernum); + allow_create, replace_required, sernum, + extack); if (IS_ERR(sn)) { /* If it is failed, discard just allocated @@ -1047,7 +1054,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, rt->rt6i_src.plen, offsetof(struct rt6_info, rt6i_src), - allow_create, replace_required, sernum); + allow_create, replace_required, sernum, + extack); if (IS_ERR(sn)) { err = PTR_ERR(sn); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bf8a58a1c32d..0d6f3b6345de 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -67,9 +67,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * struct in6_addr *nexthop; int ret; - skb->protocol = htons(ETH_P_IPV6); - skb->dev = dev; - if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); @@ -154,6 +151,9 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct net_device *dev = skb_dst(skb)->dev; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); + skb->protocol = htons(ETH_P_IPV6); + skb->dev = dev; + if (unlikely(idev->cnf.disable_ipv6)) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); @@ -869,7 +869,6 @@ fail_toobig: if (skb->sk && dst_allfrag(skb_dst(skb))) sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); - skb->dev = skb_dst(skb)->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); err = -EMSGSIZE; diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index eedee5d108d9..f63b18e05c69 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -220,9 +220,6 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook) __be16 fo; u8 proto; - if (skb->csum_bad) - return false; - if (skb_csum_unnecessary(skb)) return true; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7cebd954d5bb..18fe6e2b88d5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -938,14 +938,15 @@ EXPORT_SYMBOL(rt6_lookup); */ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, - struct mx6_config *mxc) + struct mx6_config *mxc, + struct netlink_ext_ack *extack) { int err; struct fib6_table *table; table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info, mxc); + err = fib6_add(&table->tb6_root, rt, info, mxc, extack); write_unlock_bh(&table->tb6_lock); return err; @@ -956,7 +957,7 @@ int ip6_ins_rt(struct rt6_info *rt) struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; struct mx6_config mxc = { .mx = NULL, }; - return __ip6_ins_rt(rt, &info, &mxc); + return __ip6_ins_rt(rt, &info, &mxc, NULL); } static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, @@ -1844,7 +1845,8 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, return rt; } -static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) +static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, + struct netlink_ext_ack *extack) { struct net *net = cfg->fc_nlinfo.nl_net; struct rt6_info *rt = NULL; @@ -1855,14 +1857,25 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) int err = -EINVAL; /* RTF_PCPU is an internal flag; can not be set by userspace */ - if (cfg->fc_flags & RTF_PCPU) + if (cfg->fc_flags & RTF_PCPU) { + NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); goto out; + } - if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) + if (cfg->fc_dst_len > 128) { + NL_SET_ERR_MSG(extack, "Invalid prefix length"); + goto out; + } + if (cfg->fc_src_len > 128) { + NL_SET_ERR_MSG(extack, "Invalid source address length"); goto out; + } #ifndef CONFIG_IPV6_SUBTREES - if (cfg->fc_src_len) + if (cfg->fc_src_len) { + NL_SET_ERR_MSG(extack, + "Specifying source address requires IPV6_SUBTREES to be enabled"); goto out; + } #endif if (cfg->fc_ifindex) { err = -ENODEV; @@ -1926,7 +1939,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) err = lwtunnel_build_state(cfg->fc_encap_type, cfg->fc_encap, AF_INET6, cfg, - &lwtstate); + &lwtstate, extack); if (err) goto out; rt->dst.lwtstate = lwtstate_get(lwtstate); @@ -2013,9 +2026,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) err = -EINVAL; if (ipv6_chk_addr_and_flags(net, gw_addr, gwa_type & IPV6_ADDR_LINKLOCAL ? - dev : NULL, 0, 0)) + dev : NULL, 0, 0)) { + NL_SET_ERR_MSG(extack, "Invalid gateway address"); goto out; - + } rt->rt6i_gateway = *gw_addr; if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { @@ -2031,8 +2045,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) addressing */ if (!(gwa_type & (IPV6_ADDR_UNICAST | - IPV6_ADDR_MAPPED))) + IPV6_ADDR_MAPPED))) { + NL_SET_ERR_MSG(extack, + "Invalid gateway address"); goto out; + } if (cfg->fc_table) { grt = ip6_nh_lookup_table(net, cfg, gw_addr); @@ -2072,8 +2089,14 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) goto out; } err = -EINVAL; - if (!dev || (dev->flags & IFF_LOOPBACK)) + if (!dev) { + NL_SET_ERR_MSG(extack, "Egress device not specified"); + goto out; + } else if (dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG(extack, + "Egress device can not be loopback device for this route"); goto out; + } } err = -ENODEV; @@ -2082,6 +2105,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) if (!ipv6_addr_any(&cfg->fc_prefsrc)) { if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { + NL_SET_ERR_MSG(extack, "Invalid source address"); err = -EINVAL; goto out; } @@ -2111,13 +2135,14 @@ out: return ERR_PTR(err); } -int ip6_route_add(struct fib6_config *cfg) +int ip6_route_add(struct fib6_config *cfg, + struct netlink_ext_ack *extack) { struct mx6_config mxc = { .mx = NULL, }; struct rt6_info *rt; int err; - rt = ip6_route_info_create(cfg); + rt = ip6_route_info_create(cfg, extack); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; @@ -2128,7 +2153,7 @@ int ip6_route_add(struct fib6_config *cfg) if (err) goto out; - err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); + err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack); kfree(mxc.mx); @@ -2222,7 +2247,8 @@ out_put: return err; } -static int ip6_route_del(struct fib6_config *cfg) +static int ip6_route_del(struct fib6_config *cfg, + struct netlink_ext_ack *extack) { struct fib6_table *table; struct fib6_node *fn; @@ -2230,8 +2256,10 @@ static int ip6_route_del(struct fib6_config *cfg) int err = -ESRCH; table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); - if (!table) + if (!table) { + NL_SET_ERR_MSG(extack, "FIB table does not exist"); return err; + } read_lock_bh(&table->tb6_lock); @@ -2483,7 +2511,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, if (!prefixlen) cfg.fc_flags |= RTF_DEFAULT; - ip6_route_add(&cfg); + ip6_route_add(&cfg, NULL); return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); } @@ -2529,7 +2557,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, cfg.fc_gateway = *gwaddr; - if (!ip6_route_add(&cfg)) { + if (!ip6_route_add(&cfg, NULL)) { struct fib6_table *table; table = fib6_get_table(dev_net(dev), cfg.fc_table); @@ -2622,10 +2650,10 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) rtnl_lock(); switch (cmd) { case SIOCADDRT: - err = ip6_route_add(&cfg); + err = ip6_route_add(&cfg, NULL); break; case SIOCDELRT: - err = ip6_route_del(&cfg); + err = ip6_route_del(&cfg, NULL); break; default: err = -EINVAL; @@ -2904,7 +2932,8 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, - struct fib6_config *cfg) + struct fib6_config *cfg, + struct netlink_ext_ack *extack) { struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; @@ -2988,7 +3017,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); err = lwtunnel_valid_encap_type_attr(cfg->fc_mp, - cfg->fc_mp_len); + cfg->fc_mp_len, extack); if (err < 0) goto errout; } @@ -3007,7 +3036,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_ENCAP_TYPE]) { cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); - err = lwtunnel_valid_encap_type(cfg->fc_encap_type); + err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack); if (err < 0) goto errout; } @@ -3098,7 +3127,8 @@ static void ip6_route_mpath_notify(struct rt6_info *rt, inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); } -static int ip6_route_multipath_add(struct fib6_config *cfg) +static int ip6_route_multipath_add(struct fib6_config *cfg, + struct netlink_ext_ack *extack) { struct rt6_info *rt_notif = NULL, *rt_last = NULL; struct nl_info *info = &cfg->fc_nlinfo; @@ -3146,7 +3176,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) r_cfg.fc_encap_type = nla_get_u16(nla); } - rt = ip6_route_info_create(&r_cfg); + rt = ip6_route_info_create(&r_cfg, extack); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; @@ -3171,7 +3201,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { rt_last = nh->rt6_info; - err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc); + err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack); /* save reference to first route for notification */ if (!rt_notif && !err) rt_notif = nh->rt6_info; @@ -3213,7 +3243,7 @@ add_errout: list_for_each_entry(nh, &rt6_nh_list, next) { if (err_nh == nh) break; - ip6_route_del(&nh->r_cfg); + ip6_route_del(&nh->r_cfg, extack); } cleanup: @@ -3228,7 +3258,8 @@ cleanup: return err; } -static int ip6_route_multipath_del(struct fib6_config *cfg) +static int ip6_route_multipath_del(struct fib6_config *cfg, + struct netlink_ext_ack *extack) { struct fib6_config r_cfg; struct rtnexthop *rtnh; @@ -3255,7 +3286,7 @@ static int ip6_route_multipath_del(struct fib6_config *cfg) r_cfg.fc_flags |= RTF_GATEWAY; } } - err = ip6_route_del(&r_cfg); + err = ip6_route_del(&r_cfg, extack); if (err) last_err = err; @@ -3271,15 +3302,15 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, struct fib6_config cfg; int err; - err = rtm_to_fib6_config(skb, nlh, &cfg); + err = rtm_to_fib6_config(skb, nlh, &cfg, extack); if (err < 0) return err; if (cfg.fc_mp) - return ip6_route_multipath_del(&cfg); + return ip6_route_multipath_del(&cfg, extack); else { cfg.fc_delete_all_nh = 1; - return ip6_route_del(&cfg); + return ip6_route_del(&cfg, extack); } } @@ -3289,14 +3320,14 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, struct fib6_config cfg; int err; - err = rtm_to_fib6_config(skb, nlh, &cfg); + err = rtm_to_fib6_config(skb, nlh, &cfg, extack); if (err < 0) return err; if (cfg.fc_mp) - return ip6_route_multipath_add(&cfg); + return ip6_route_multipath_add(&cfg, extack); else - return ip6_route_add(&cfg); + return ip6_route_add(&cfg, extack); } static size_t rt6_nlmsg_size(struct rt6_info *rt) @@ -3577,11 +3608,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; + int err, iif = 0, oif = 0; + struct dst_entry *dst; struct rt6_info *rt; struct sk_buff *skb; struct rtmsg *rtm; struct flowi6 fl6; - int err, iif = 0, oif = 0; + bool fibmatch; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, extack); @@ -3592,6 +3625,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, memset(&fl6, 0, sizeof(fl6)); rtm = nlmsg_data(nlh); fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0); + fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH); if (tb[RTA_SRC]) { if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) @@ -3637,12 +3671,23 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (!ipv6_addr_any(&fl6.saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; - rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6, - flags); + if (!fibmatch) + dst = ip6_route_input_lookup(net, dev, &fl6, flags); } else { fl6.flowi6_oif = oif; - rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); + if (!fibmatch) + dst = ip6_route_output(net, NULL, &fl6); + } + + if (fibmatch) + dst = ip6_route_lookup(net, &fl6, 0); + + rt = container_of(dst, struct rt6_info, dst); + if (rt->dst.error) { + err = rt->dst.error; + ip6_rt_put(rt); + goto errout; } if (rt == net->ipv6.ip6_null_entry) { @@ -3659,10 +3704,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, } skb_dst_set(skb, &rt->dst); - - err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0); + if (fibmatch) + err = rt6_fill_node(net, skb, rt, NULL, NULL, iif, + RTM_NEWROUTE, NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); + else + err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, + RTM_NEWROUTE, NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); if (err < 0) { kfree_skb(skb); goto errout; diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 5f44ffed2576..15fba55e3da8 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -303,13 +303,9 @@ static int seg6_genl_dumphmac_done(struct netlink_callback *cb) static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) { struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; - struct net *net = sock_net(skb->sk); - struct seg6_pernet_data *sdata; struct seg6_hmac_info *hinfo; int ret; - sdata = seg6_pernet(net); - ret = rhashtable_walk_start(iter); if (ret && ret != -EAGAIN) goto done; diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 6a495490d43e..264d772d3c7d 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -326,7 +326,8 @@ drop: static int seg6_build_state(struct nlattr *nla, unsigned int family, const void *cfg, - struct lwtunnel_state **ts) + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) { struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1]; struct seg6_iptunnel_encap *tuninfo; @@ -336,7 +337,7 @@ static int seg6_build_state(struct nlattr *nla, int err; err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla, - seg6_iptunnel_policy, NULL); + seg6_iptunnel_policy, extack); if (err < 0) return err; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 5abc3692b901..2f7e99af67db 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -162,15 +162,16 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0, NULL); + tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32, + tsoff = secure_tcpv6_ts_off(sock_net(sk), + ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32); tcp_opt.rcv_tsecr -= tsoff; } - if (!cookie_timestamp_decode(&tcp_opt)) + if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt)) goto out; ret = NULL; @@ -211,7 +212,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->snt_synack.v64 = 0; + treq->snt_synack = 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; treq->ts_off = 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4f4310a36a04..84ad50218255 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -109,9 +109,9 @@ static u32 tcp_v6_init_seq(const struct sk_buff *skb) tcp_hdr(skb)->source); } -static u32 tcp_v6_init_ts_off(const struct sk_buff *skb) +static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) { - return secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32, + return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32); } @@ -292,7 +292,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport); - tp->tsoffset = secure_tcpv6_ts_off(np->saddr.s6_addr32, + tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), + np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32); } @@ -949,7 +950,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcp_time_stamp + tcptw->tw_ts_offset, + tcp_time_stamp_raw() + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel)); @@ -971,7 +972,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp + tcp_rsk(req)->ts_off, + tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); @@ -1248,9 +1249,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (tcp_filter(sk, skb)) - goto discard; - /* * socket locking is here for SMP purposes as backlog rcv * is currently called with bh processing disabled. @@ -1909,6 +1907,7 @@ struct proto tcpv6_prot = { .unhash = inet_unhash, .get_port = inet_csk_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, + .leave_memory_pressure = tcp_leave_memory_pressure, .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 06ec39b79609..2e9b52bded2d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -455,7 +455,8 @@ try_again: return err; csum_copy_err: - if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) { + if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags, + udp_skb_destructor)) { if (is_udp4) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index deca20fb2ce2..da49191f7ad0 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1985,7 +1985,7 @@ static int kcm_create(struct net *net, struct socket *sock, return 0; } -static struct net_proto_family kcm_family_ops = { +static const struct net_proto_family kcm_family_ops = { .family = PF_KCM, .create = kcm_create, .owner = THIS_MODULE, diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 1b7a4daf283c..3a0282188ad6 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -449,44 +449,21 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, buf_size, true, false); } -void ieee80211_start_rx_ba_session_offl(struct ieee80211_vif *vif, - const u8 *addr, u16 tid) +void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, + const u8 *addr, unsigned int bit) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; - struct ieee80211_rx_agg *rx_agg; - struct sk_buff *skb = dev_alloc_skb(0); - - if (unlikely(!skb)) - return; - - rx_agg = (struct ieee80211_rx_agg *) &skb->cb; - memcpy(&rx_agg->addr, addr, ETH_ALEN); - rx_agg->tid = tid; - - skb->pkt_type = IEEE80211_SDATA_QUEUE_RX_AGG_START; - skb_queue_tail(&sdata->skb_queue, skb); - ieee80211_queue_work(&local->hw, &sdata->work); -} -EXPORT_SYMBOL(ieee80211_start_rx_ba_session_offl); - -void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, - const u8 *addr, u16 tid) -{ - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_local *local = sdata->local; - struct ieee80211_rx_agg *rx_agg; - struct sk_buff *skb = dev_alloc_skb(0); - - if (unlikely(!skb)) - return; + struct sta_info *sta; - rx_agg = (struct ieee80211_rx_agg *) &skb->cb; - memcpy(&rx_agg->addr, addr, ETH_ALEN); - rx_agg->tid = tid; + rcu_read_lock(); + sta = sta_info_get_bss(sdata, addr); + if (!sta) + goto unlock; - skb->pkt_type = IEEE80211_SDATA_QUEUE_RX_AGG_STOP; - skb_queue_tail(&sdata->skb_queue, skb); - ieee80211_queue_work(&local->hw, &sdata->work); + set_bit(bit, sta->ampdu_mlme.tid_rx_manage_offl); + ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + unlock: + rcu_read_unlock(); } -EXPORT_SYMBOL(ieee80211_stop_rx_ba_session_offl); +EXPORT_SYMBOL(ieee80211_manage_rx_ba_offl); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4a388fe8c2d1..f9eb2486d550 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1876,6 +1876,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, ifmsh->user_mpm = setup->user_mpm; ifmsh->mesh_auth_id = setup->auth_id; ifmsh->security = IEEE80211_MESH_SEC_NONE; + ifmsh->userspace_handles_dfs = setup->userspace_handles_dfs; if (setup->is_authenticated) ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; if (setup->is_secure) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 42601820db20..b15412c21ac9 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -154,6 +154,12 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, p += scnprintf(p, bufsz+buf-p, + "target %uus interval %uus ecn %s\n", + codel_time_to_us(sta->cparams.target), + codel_time_to_us(sta->cparams.interval), + sta->cparams.ecn ? "yes" : "no"); + p += scnprintf(p, + bufsz+buf-p, "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n"); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 6ca5442b1e03..9e71226c2d25 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -331,6 +331,18 @@ void ieee80211_ba_session_work(struct work_struct *work) sta, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_UNSPECIFIED, true); + if (test_and_clear_bit(tid, + sta->ampdu_mlme.tid_rx_manage_offl)) + __ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, + IEEE80211_MAX_AMPDU_BUF, + false, true); + + if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS, + sta->ampdu_mlme.tid_rx_manage_offl)) + ___ieee80211_stop_rx_ba_session( + sta, tid, WLAN_BACK_RECIPIENT, + 0, false); + spin_lock_bh(&sta->lock); tid_tx = sta->ampdu_mlme.tid_start_tx[tid]; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 364d4e137649..660ac6a426f4 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -808,7 +808,6 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, } memset(¶ms, 0, sizeof(params)); - memset(&csa_ie, 0, sizeof(csa_ie)); err = ieee80211_parse_ch_switch_ie(sdata, elems, ifibss->chandef.chan->band, sta_flags, ifibss->bssid, &csa_ie); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 5e002f62c235..2197c62a0a6e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -643,6 +643,8 @@ struct ieee80211_if_mesh { unsigned long wrkq_flags; unsigned long mbss_changed; + bool userspace_handles_dfs; + u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; size_t mesh_id_len; /* Active Path Selection Protocol Identifier */ @@ -1029,17 +1031,6 @@ ieee80211_vif_get_shift(struct ieee80211_vif *vif) return shift; } -struct ieee80211_rx_agg { - u8 addr[ETH_ALEN]; - u16 tid; -}; - -enum sdata_queue_type { - IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0, - IEEE80211_SDATA_QUEUE_RX_AGG_START = 3, - IEEE80211_SDATA_QUEUE_RX_AGG_STOP = 4, -}; - enum { IEEE80211_RX_MSG = 1, IEEE80211_TX_STATUS_MSG = 2, @@ -1432,6 +1423,7 @@ struct ieee80211_csa_ie { u8 count; u8 ttl; u16 pre_value; + u16 reason_code; }; /* Parsed Information Elements */ @@ -2057,6 +2049,8 @@ u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, const struct cfg80211_chan_def *chandef, u16 prot_mode, bool rifs_mode); +void ieee80211_ie_build_wide_bw_cs(u8 *pos, + const struct cfg80211_chan_def *chandef); u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, u32 cap); u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f5f50150ba1c..9228ac73c429 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1237,7 +1237,6 @@ static void ieee80211_iface_work(struct work_struct *work) struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct sta_info *sta; - struct ieee80211_rx_agg *rx_agg; if (!ieee80211_sdata_running(sdata)) return; @@ -1252,28 +1251,8 @@ static void ieee80211_iface_work(struct work_struct *work) while ((skb = skb_dequeue(&sdata->skb_queue))) { struct ieee80211_mgmt *mgmt = (void *)skb->data; - if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_START) { - rx_agg = (void *)&skb->cb; - mutex_lock(&local->sta_mtx); - sta = sta_info_get_bss(sdata, rx_agg->addr); - if (sta) - __ieee80211_start_rx_ba_session(sta, - 0, 0, 0, 1, rx_agg->tid, - IEEE80211_MAX_AMPDU_BUF, - false, true); - mutex_unlock(&local->sta_mtx); - } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_STOP) { - rx_agg = (void *)&skb->cb; - mutex_lock(&local->sta_mtx); - sta = sta_info_get_bss(sdata, rx_agg->addr); - if (sta) - __ieee80211_stop_rx_ba_session(sta, - rx_agg->tid, - WLAN_BACK_RECIPIENT, 0, - false); - mutex_unlock(&local->sta_mtx); - } else if (ieee80211_is_action(mgmt->frame_control) && - mgmt->u.action.category == WLAN_CATEGORY_BACK) { + if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_BACK) { int len = skb->len; mutex_lock(&local->sta_mtx); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 737e1f082b0d..ad5d1cf39190 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -690,6 +690,9 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) 2 + sizeof(struct ieee80211_channel_sw_ie) + /* Mesh Channel Switch Parameters */ 2 + sizeof(struct ieee80211_mesh_chansw_params_ie) + + /* Channel Switch Wrapper + Wide Bandwidth CSA IE */ + 2 + 2 + sizeof(struct ieee80211_wide_bw_chansw_ie) + + 2 + sizeof(struct ieee80211_sec_chan_offs_ie) + 2 + 8 + /* supported rates */ 2 + 3; /* DS params */ tail_len = 2 + (IEEE80211_MAX_SUPP_RATES - 8) + @@ -736,8 +739,12 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) rcu_read_lock(); csa = rcu_dereference(ifmsh->csa); if (csa) { - pos = skb_put(skb, 13); - memset(pos, 0, 13); + enum nl80211_channel_type ct; + struct cfg80211_chan_def *chandef; + int ie_len = 2 + sizeof(struct ieee80211_channel_sw_ie) + + 2 + sizeof(struct ieee80211_mesh_chansw_params_ie); + + pos = skb_put_zero(skb, ie_len); *pos++ = WLAN_EID_CHANNEL_SWITCH; *pos++ = 3; *pos++ = 0x0; @@ -760,6 +767,37 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) pos += 2; put_unaligned_le16(ifmsh->pre_value, pos); pos += 2; + + switch (csa->settings.chandef.width) { + case NL80211_CHAN_WIDTH_40: + ie_len = 2 + sizeof(struct ieee80211_sec_chan_offs_ie); + pos = skb_put_zero(skb, ie_len); + + *pos++ = WLAN_EID_SECONDARY_CHANNEL_OFFSET; /* EID */ + *pos++ = 1; /* len */ + ct = cfg80211_get_chandef_type(&csa->settings.chandef); + if (ct == NL80211_CHAN_HT40PLUS) + *pos++ = IEEE80211_HT_PARAM_CHA_SEC_ABOVE; + else + *pos++ = IEEE80211_HT_PARAM_CHA_SEC_BELOW; + break; + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + /* Channel Switch Wrapper + Wide Bandwidth CSA IE */ + ie_len = 2 + 2 + + sizeof(struct ieee80211_wide_bw_chansw_ie); + pos = skb_put_zero(skb, ie_len); + + *pos++ = WLAN_EID_CHANNEL_SWITCH_WRAPPER; /* EID */ + *pos++ = 5; /* len */ + /* put sub IE */ + chandef = &csa->settings.chandef; + ieee80211_ie_build_wide_bw_cs(pos, chandef); + break; + default: + break; + } } rcu_read_unlock(); @@ -916,6 +954,21 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) ieee80211_configure_filter(local); } +static void ieee80211_mesh_csa_mark_radar(struct ieee80211_sub_if_data *sdata) +{ + int err; + + /* if the current channel is a DFS channel, mark the channel as + * unavailable. + */ + err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, + &sdata->vif.bss_conf.chandef, + NL80211_IFTYPE_MESH_POINT); + if (err > 0) + cfg80211_radar_event(sdata->local->hw.wiphy, + &sdata->vif.bss_conf.chandef, GFP_ATOMIC); +} + static bool ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, bool beacon) @@ -933,19 +986,20 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, if (!sband) return false; - sta_flags = IEEE80211_STA_DISABLE_VHT; + sta_flags = 0; switch (sdata->vif.bss_conf.chandef.width) { case NL80211_CHAN_WIDTH_20_NOHT: sta_flags |= IEEE80211_STA_DISABLE_HT; case NL80211_CHAN_WIDTH_20: sta_flags |= IEEE80211_STA_DISABLE_40MHZ; + case NL80211_CHAN_WIDTH_40: + sta_flags |= IEEE80211_STA_DISABLE_VHT; break; default: break; } memset(¶ms, 0, sizeof(params)); - memset(&csa_ie, 0, sizeof(csa_ie)); err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band, sta_flags, sdata->vif.addr, &csa_ie); @@ -954,11 +1008,19 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, if (err) return false; + /* Mark the channel unavailable if the reason for the switch is + * regulatory. + */ + if (csa_ie.reason_code == WLAN_REASON_MESH_CHAN_REGULATORY) + ieee80211_mesh_csa_mark_radar(sdata); + params.chandef = csa_ie.chandef; params.count = csa_ie.count; if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, ¶ms.chandef, - IEEE80211_CHAN_DISABLED)) { + IEEE80211_CHAN_DISABLED) || + !cfg80211_reg_can_beacon(sdata->local->hw.wiphy, ¶ms.chandef, + NL80211_IFTYPE_MESH_POINT)) { sdata_info(sdata, "mesh STA %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), aborting\n", sdata->vif.addr, @@ -974,9 +1036,16 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, NL80211_IFTYPE_MESH_POINT); if (err < 0) return false; - if (err > 0) - /* TODO: DFS not (yet) supported */ + if (err > 0 && !ifmsh->userspace_handles_dfs) { + sdata_info(sdata, + "mesh STA %pM switches to channel requiring DFS (%d MHz, width:%d, CF1/2: %d/%d MHz), aborting\n", + sdata->vif.addr, + params.chandef.chan->center_freq, + params.chandef.width, + params.chandef.center_freq1, + params.chandef.center_freq2); return false; + } params.radar_required = err; @@ -1233,7 +1302,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, pos = mgmt->u.action.u.chan_switch.variable; baselen = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch.variable); - ieee802_11_parse_elems(pos, len - baselen, false, &elems); + ieee802_11_parse_elems(pos, len - baselen, true, &elems); ifmsh->chsw_ttl = elems.mesh_chansw_params_ie->mesh_ttl; if (!--ifmsh->chsw_ttl) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 1131cd504a15..82cfd232a25e 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -264,8 +264,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, band = sband->band; /* capability info */ - pos = skb_put(skb, 2); - memset(pos, 0, 2); + pos = skb_put_zero(skb, 2); if (action == WLAN_SP_MESH_PEERING_CONFIRM) { /* AID */ pos = skb_put(skb, 2); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index cc8e6ea1b27e..1929bce8e518 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1122,7 +1122,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; current_band = cbss->channel->band; - memset(&csa_ie, 0, sizeof(csa_ie)); res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band, ifmgd->flags, ifmgd->associated->bssid, &csa_ie); diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index ea1f4315c521..76f303fda3ed 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -943,6 +943,8 @@ int rate_control_set_rates(struct ieee80211_hw *hw, drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta); + ieee80211_sta_set_expected_throughput(pubsta, sta_get_expected_throughput(sta)); + return 0; } EXPORT_SYMBOL(rate_control_set_rates); @@ -991,4 +993,3 @@ void rate_control_deinitialize(struct ieee80211_local *local) local->rate_ctrl = NULL; rate_control_free(local, ref); } - diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3674fe3d67dc..004a2283c5d9 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -237,7 +237,6 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata, if (!skb) return; - skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; skb_queue_tail(&sdata->skb_queue, skb); ieee80211_queue_work(&sdata->local->hw, &sdata->work); } @@ -1217,7 +1216,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, /* if this mpdu is fragmented - terminate rx aggregation session */ sc = le16_to_cpu(hdr->seq_ctrl); if (sc & IEEE80211_SCTL_FRAG) { - skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; skb_queue_tail(&rx->sdata->skb_queue, skb); ieee80211_queue_work(&local->hw, &rx->sdata->work); return; @@ -3104,7 +3102,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) return RX_QUEUED; queue: - rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; skb_queue_tail(&sdata->skb_queue, rx->skb); ieee80211_queue_work(&local->hw, &sdata->work); if (rx->sta) @@ -3250,7 +3247,6 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) } /* queue up frame and kick off work to process it */ - rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; skb_queue_tail(&sdata->skb_queue, rx->skb); ieee80211_queue_work(&rx->local->hw, &sdata->work); if (rx->sta) diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 0782e486fe89..bf8f5dcea1c4 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -36,6 +36,8 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; int secondary_channel_offset = -1; + memset(csa_ie, 0, sizeof(*csa_ie)); + sec_chan_offs = elems->sec_chan_offs; wide_bw_chansw_ie = elems->wide_bw_chansw_ie; @@ -76,6 +78,11 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, csa_ie->mode = elems->mesh_chansw_params_ie->mesh_flags; csa_ie->pre_value = le16_to_cpu( elems->mesh_chansw_params_ie->mesh_pre_value); + + if (elems->mesh_chansw_params_ie->mesh_flags & + WLAN_EID_CHAN_SWITCH_PARAM_REASON) + csa_ie->reason_code = le16_to_cpu( + elems->mesh_chansw_params_ie->mesh_reason); } new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 403e3cc58b57..46e1809356f6 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -20,6 +20,7 @@ #include <linux/timer.h> #include <linux/rtnetlink.h> +#include <net/codel.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -425,6 +426,11 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->sta.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA; + sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD; + sta->cparams.target = MS2TIME(20); + sta->cparams.interval = MS2TIME(100); + sta->cparams.ecn = true; + sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); return sta; @@ -2310,3 +2316,27 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta) return stats->last_rx; return sta->status_stats.last_ack; } + +static void sta_update_codel_params(struct sta_info *sta, u32 thr) +{ + if (!sta->sdata->local->ops->wake_tx_queue) + return; + + if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) { + sta->cparams.target = MS2TIME(50); + sta->cparams.interval = MS2TIME(300); + sta->cparams.ecn = false; + } else { + sta->cparams.target = MS2TIME(20); + sta->cparams.interval = MS2TIME(100); + sta->cparams.ecn = true; + } +} + +void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, + u32 thr) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + + sta_update_codel_params(sta, thr); +} diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index ea0747d6a6da..3acbdfa9f649 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -233,6 +233,8 @@ struct tid_ampdu_rx { * RX timer expired until the work for it runs * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the * driver requested to close until the work for it runs + * @tid_rx_manage_offl: bitmap indicating which BA sessions were requested + * to be treated as started/stopped due to offloading * @agg_session_valid: bitmap indicating which TID has a rx BA session open on * @unexpected_agg: bitmap indicating which TID already sent a delBA due to * unexpected aggregation related frames outside a session @@ -250,6 +252,7 @@ struct sta_ampdu_mlme { u8 tid_rx_token[IEEE80211_NUM_TIDS]; unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; + unsigned long tid_rx_manage_offl[BITS_TO_LONGS(2 * IEEE80211_NUM_TIDS)]; unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long unexpected_agg[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; /* tx */ @@ -396,6 +399,14 @@ struct ieee80211_sta_rx_stats { }; /** + * The bandwidth threshold below which the per-station CoDel parameters will be + * scaled to be more lenient (to prevent starvation of slow stations). This + * value will be scaled by the number of active stations when it is being + * applied. + */ +#define STA_SLOW_THRESHOLD 6000 /* 6 Mbps */ + +/** * struct sta_info - STA information * * This structure collects information about a station that @@ -448,6 +459,7 @@ struct ieee80211_sta_rx_stats { * @known_smps_mode: the smps_mode the client thinks we are in. Relevant for * AP only. * @cipher_scheme: optional cipher scheme for this station + * @cparams: CoDel parameters for this station. * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) * @fast_tx: TX fastpath information * @fast_rx: RX fastpath information @@ -551,6 +563,8 @@ struct sta_info { enum ieee80211_smps_mode known_smps_mode; const struct ieee80211_cipher_scheme *cipher_scheme; + struct codel_params cparams; + u8 reserved_tid; struct cfg80211_chan_def tdls_chandef; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index be47ac5cd8c8..a9fa6ee57e8f 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -546,6 +546,8 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, skb->wifi_acked_valid = 1; skb->wifi_acked = acked; } + + ieee80211_led_tx(local); } /* @@ -823,8 +825,6 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, } } - ieee80211_led_tx(local); - /* SNMP counters * Fragments are passed to low-level drivers as separate skbs, so these * are actually fragments, not frames. Update frame counters only for diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 0d645bc148d0..3d9ac17af407 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -92,16 +92,19 @@ __field(u16, ssn) \ __field(u8, buf_size) \ __field(bool, amsdu) \ - __field(u16, timeout) + __field(u16, timeout) \ + __field(u16, action) #define AMPDU_ACTION_ASSIGN STA_NAMED_ASSIGN(params->sta); \ __entry->tid = params->tid; \ __entry->ssn = params->ssn; \ __entry->buf_size = params->buf_size; \ __entry->amsdu = params->amsdu; \ - __entry->timeout = params->timeout; -#define AMPDU_ACTION_PR_FMT STA_PR_FMT " tid %d, ssn %d, buf_size %u, amsdu %d, timeout %d" + __entry->timeout = params->timeout; \ + __entry->action = params->action; +#define AMPDU_ACTION_PR_FMT STA_PR_FMT " tid %d, ssn %d, buf_size %u, amsdu %d, timeout %d action %d" #define AMPDU_ACTION_PR_ARG STA_PR_ARG, __entry->tid, __entry->ssn, \ - __entry->buf_size, __entry->amsdu, __entry->timeout + __entry->buf_size, __entry->amsdu, __entry->timeout, \ + __entry->action /* * Tracing for driver callbacks. diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 04b22f8982fe..b8dc41191835 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1340,9 +1340,16 @@ static struct sk_buff *fq_tin_dequeue_func(struct fq *fq, local = container_of(fq, struct ieee80211_local, fq); txqi = container_of(tin, struct txq_info, tin); - cparams = &local->cparams; cstats = &txqi->cstats; + if (txqi->txq.sta) { + struct sta_info *sta = container_of(txqi->txq.sta, + struct sta_info, sta); + cparams = &sta->cparams; + } else { + cparams = &local->cparams; + } + if (flow == &txqi->def_flow) cvars = &txqi->def_cvars; else diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ac9ac6c35594..de0f1cdb64d4 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2414,6 +2414,35 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, return pos + sizeof(struct ieee80211_ht_operation); } +void ieee80211_ie_build_wide_bw_cs(u8 *pos, + const struct cfg80211_chan_def *chandef) +{ + *pos++ = WLAN_EID_WIDE_BW_CHANNEL_SWITCH; /* EID */ + *pos++ = 3; /* IE length */ + /* New channel width */ + switch (chandef->width) { + case NL80211_CHAN_WIDTH_80: + *pos++ = IEEE80211_VHT_CHANWIDTH_80MHZ; + break; + case NL80211_CHAN_WIDTH_160: + *pos++ = IEEE80211_VHT_CHANWIDTH_160MHZ; + break; + case NL80211_CHAN_WIDTH_80P80: + *pos++ = IEEE80211_VHT_CHANWIDTH_80P80MHZ; + break; + default: + *pos++ = IEEE80211_VHT_CHANWIDTH_USE_HT; + } + + /* new center frequency segment 0 */ + *pos++ = ieee80211_frequency_to_channel(chandef->center_freq1); + /* new center frequency segment 1 */ + if (chandef->center_freq2) + *pos++ = ieee80211_frequency_to_channel(chandef->center_freq2); + else + *pos++ = 0; +} + u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, const struct cfg80211_chan_def *chandef) { @@ -2964,6 +2993,7 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, skb = dev_alloc_skb(local->tx_headroom + hdr_len + 5 + /* channel switch announcement element */ 3 + /* secondary channel offset element */ + 5 + /* wide bandwidth channel switch announcement */ 8); /* mesh channel switch parameters element */ if (!skb) return -ENOMEM; @@ -3022,6 +3052,13 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, pos += 2; } + if (csa_settings->chandef.width == NL80211_CHAN_WIDTH_80 || + csa_settings->chandef.width == NL80211_CHAN_WIDTH_80P80 || + csa_settings->chandef.width == NL80211_CHAN_WIDTH_160) { + skb_put(skb, 5); + ieee80211_ie_build_wide_bw_cs(pos, &csa_settings->chandef); + } + ieee80211_tx_skb(sdata, skb); return 0; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 7b05fd1497ce..b51582d92740 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -684,6 +684,54 @@ errout: return err; } +static int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table, + u8 via_addr[], struct netlink_ext_ack *extack) +{ + struct rtvia *via = nla_data(nla); + int err = -EINVAL; + int alen; + + if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Invalid attribute length for RTA_VIA"); + goto errout; + } + alen = nla_len(nla) - + offsetof(struct rtvia, rtvia_addr); + if (alen > MAX_VIA_ALEN) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Invalid address length for RTA_VIA"); + goto errout; + } + + /* Validate the address family */ + switch (via->rtvia_family) { + case AF_PACKET: + *via_table = NEIGH_LINK_TABLE; + break; + case AF_INET: + *via_table = NEIGH_ARP_TABLE; + if (alen != 4) + goto errout; + break; + case AF_INET6: + *via_table = NEIGH_ND_TABLE; + if (alen != 16) + goto errout; + break; + default: + /* Unsupported address family */ + goto errout; + } + + memcpy(via_addr, via->rtvia_addr, alen); + *via_alen = alen; + err = 0; + +errout: + return err; +} + static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, struct mpls_route *rt) { @@ -695,8 +743,6 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, if (!nh) return -ENOMEM; - err = -EINVAL; - nh->nh_labels = cfg->rc_output_labels; for (i = 0; i < nh->nh_labels; i++) nh->nh_label[i] = cfg->rc_output_label[i]; @@ -720,7 +766,8 @@ errout: static int mpls_nh_build(struct net *net, struct mpls_route *rt, struct mpls_nh *nh, int oif, struct nlattr *via, - struct nlattr *newdst, u8 max_labels) + struct nlattr *newdst, u8 max_labels, + struct netlink_ext_ack *extack) { int err = -ENOMEM; @@ -728,15 +775,15 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt, goto errout; if (newdst) { - err = nla_get_labels(newdst, max_labels, - &nh->nh_labels, nh->nh_label); + err = nla_get_labels(newdst, max_labels, &nh->nh_labels, + nh->nh_label, extack); if (err) goto errout; } if (via) { err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table, - __mpls_nh_via(rt, nh)); + __mpls_nh_via(rt, nh), extack); if (err) goto errout; } else { @@ -782,7 +829,8 @@ static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len, nla = nla_find(attrs, attrlen, RTA_NEWDST); if (nla && - nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, NULL) != 0) + nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, + NULL, NULL) != 0) return 0; *max_labels = max_t(u8, *max_labels, n_labels); @@ -802,7 +850,8 @@ static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len, } static int mpls_nh_build_multi(struct mpls_route_config *cfg, - struct mpls_route *rt, u8 max_labels) + struct mpls_route *rt, u8 max_labels, + struct netlink_ext_ack *extack) { struct rtnexthop *rtnh = cfg->rc_mp; struct nlattr *nla_via, *nla_newdst; @@ -836,7 +885,7 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, rtnh->rtnh_ifindex, nla_via, nla_newdst, - max_labels); + max_labels, extack); if (err) goto errout; @@ -855,7 +904,28 @@ errout: return err; } -static int mpls_route_add(struct mpls_route_config *cfg) +static bool mpls_label_ok(struct net *net, unsigned int index, + struct netlink_ext_ack *extack) +{ + /* Reserved labels may not be set */ + if (index < MPLS_LABEL_FIRST_UNRESERVED) { + NL_SET_ERR_MSG(extack, + "Invalid label - must be MPLS_LABEL_FIRST_UNRESERVED or higher"); + return false; + } + + /* The full 20 bit range may not be supported. */ + if (index >= net->mpls.platform_labels) { + NL_SET_ERR_MSG(extack, + "Label >= configured maximum in platform_labels"); + return false; + } + + return true; +} + +static int mpls_route_add(struct mpls_route_config *cfg, + struct netlink_ext_ack *extack) { struct mpls_route __rcu **platform_label; struct net *net = cfg->rc_nlinfo.nl_net; @@ -874,18 +944,15 @@ static int mpls_route_add(struct mpls_route_config *cfg) index = find_free_label(net); } - /* Reserved labels may not be set */ - if (index < MPLS_LABEL_FIRST_UNRESERVED) - goto errout; - - /* The full 20 bit range may not be supported. */ - if (index >= net->mpls.platform_labels) + if (!mpls_label_ok(net, index, extack)) goto errout; /* Append makes no sense with mpls */ err = -EOPNOTSUPP; - if (cfg->rc_nlflags & NLM_F_APPEND) + if (cfg->rc_nlflags & NLM_F_APPEND) { + NL_SET_ERR_MSG(extack, "MPLS does not support route append"); goto errout; + } err = -EEXIST; platform_label = rtnl_dereference(net->mpls.platform_label); @@ -912,8 +979,10 @@ static int mpls_route_add(struct mpls_route_config *cfg) nhs = 1; } - if (nhs == 0) + if (nhs == 0) { + NL_SET_ERR_MSG(extack, "Route does not contain a nexthop"); goto errout; + } err = -ENOMEM; rt = mpls_rt_alloc(nhs, max_via_alen, max_labels); @@ -927,7 +996,7 @@ static int mpls_route_add(struct mpls_route_config *cfg) rt->rt_ttl_propagate = cfg->rc_ttl_propagate; if (cfg->rc_mp) - err = mpls_nh_build_multi(cfg, rt, max_labels); + err = mpls_nh_build_multi(cfg, rt, max_labels, extack); else err = mpls_nh_build_from_cfg(cfg, rt); if (err) @@ -943,7 +1012,8 @@ errout: return err; } -static int mpls_route_del(struct mpls_route_config *cfg) +static int mpls_route_del(struct mpls_route_config *cfg, + struct netlink_ext_ack *extack) { struct net *net = cfg->rc_nlinfo.nl_net; unsigned index; @@ -951,12 +1021,7 @@ static int mpls_route_del(struct mpls_route_config *cfg) index = cfg->rc_label; - /* Reserved labels may not be removed */ - if (index < MPLS_LABEL_FIRST_UNRESERVED) - goto errout; - - /* The full 20 bit range may not be supported */ - if (index >= net->mpls.platform_labels) + if (!mpls_label_ok(net, index, extack)) goto errout; mpls_route_update(net, index, NULL, &cfg->rc_nlinfo); @@ -1541,8 +1606,8 @@ int nla_put_labels(struct sk_buff *skb, int attrtype, } EXPORT_SYMBOL_GPL(nla_put_labels); -int nla_get_labels(const struct nlattr *nla, - u8 max_labels, u8 *labels, u32 label[]) +int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels, + u32 label[], struct netlink_ext_ack *extack) { unsigned len = nla_len(nla); struct mpls_shim_hdr *nla_label; @@ -1553,13 +1618,18 @@ int nla_get_labels(const struct nlattr *nla, /* len needs to be an even multiple of 4 (the label size). Number * of labels is a u8 so check for overflow. */ - if (len & 3 || len / 4 > 255) + if (len & 3 || len / 4 > 255) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Invalid length for labels attribute"); return -EINVAL; + } /* Limit the number of new labels allowed */ nla_labels = len/4; - if (nla_labels > max_labels) + if (nla_labels > max_labels) { + NL_SET_ERR_MSG(extack, "Too many labels"); return -EINVAL; + } /* when label == NULL, caller wants number of labels */ if (!label) @@ -1574,8 +1644,29 @@ int nla_get_labels(const struct nlattr *nla, /* Ensure the bottom of stack flag is properly set * and ttl and tc are both clear. */ - if ((dec.bos != bos) || dec.ttl || dec.tc) + if (dec.ttl) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "TTL in label must be 0"); + return -EINVAL; + } + + if (dec.tc) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Traffic class in label must be 0"); return -EINVAL; + } + + if (dec.bos != bos) { + NL_SET_BAD_ATTR(extack, nla); + if (bos) { + NL_SET_ERR_MSG(extack, + "BOS bit must be set in first label"); + } else { + NL_SET_ERR_MSG(extack, + "BOS bit can only be set in first label"); + } + return -EINVAL; + } switch (dec.label) { case MPLS_LABEL_IMPLNULL: @@ -1583,6 +1674,8 @@ int nla_get_labels(const struct nlattr *nla, * assign and distribute, but which never * actually appears in the encapsulation. */ + NL_SET_ERR_MSG_ATTR(extack, nla, + "Implicit NULL Label (3) can not be used in encapsulation"); return -EINVAL; } @@ -1594,50 +1687,10 @@ out: } EXPORT_SYMBOL_GPL(nla_get_labels); -int nla_get_via(const struct nlattr *nla, u8 *via_alen, - u8 *via_table, u8 via_addr[]) -{ - struct rtvia *via = nla_data(nla); - int err = -EINVAL; - int alen; - - if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) - goto errout; - alen = nla_len(nla) - - offsetof(struct rtvia, rtvia_addr); - if (alen > MAX_VIA_ALEN) - goto errout; - - /* Validate the address family */ - switch (via->rtvia_family) { - case AF_PACKET: - *via_table = NEIGH_LINK_TABLE; - break; - case AF_INET: - *via_table = NEIGH_ARP_TABLE; - if (alen != 4) - goto errout; - break; - case AF_INET6: - *via_table = NEIGH_ND_TABLE; - if (alen != 16) - goto errout; - break; - default: - /* Unsupported address family */ - goto errout; - } - - memcpy(via_addr, via->rtvia_addr, alen); - *via_alen = alen; - err = 0; - -errout: - return err; -} - -static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, - struct mpls_route_config *cfg) +static int rtm_to_route_config(struct sk_buff *skb, + struct nlmsghdr *nlh, + struct mpls_route_config *cfg, + struct netlink_ext_ack *extack) { struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; @@ -1645,35 +1698,54 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, int err; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy, - NULL); + extack); if (err < 0) goto errout; err = -EINVAL; rtm = nlmsg_data(nlh); - if (rtm->rtm_family != AF_MPLS) + if (rtm->rtm_family != AF_MPLS) { + NL_SET_ERR_MSG(extack, "Invalid address family in rtmsg"); goto errout; - if (rtm->rtm_dst_len != 20) + } + if (rtm->rtm_dst_len != 20) { + NL_SET_ERR_MSG(extack, "rtm_dst_len must be 20 for MPLS"); goto errout; - if (rtm->rtm_src_len != 0) + } + if (rtm->rtm_src_len != 0) { + NL_SET_ERR_MSG(extack, "rtm_src_len must be 0 for MPLS"); goto errout; - if (rtm->rtm_tos != 0) + } + if (rtm->rtm_tos != 0) { + NL_SET_ERR_MSG(extack, "rtm_tos must be 0 for MPLS"); goto errout; - if (rtm->rtm_table != RT_TABLE_MAIN) + } + if (rtm->rtm_table != RT_TABLE_MAIN) { + NL_SET_ERR_MSG(extack, + "MPLS only supports the main route table"); goto errout; + } /* Any value is acceptable for rtm_protocol */ /* As mpls uses destination specific addresses * (or source specific address in the case of multicast) * all addresses have universal scope. */ - if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) + if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) { + NL_SET_ERR_MSG(extack, + "Invalid route scope - MPLS only supports UNIVERSE"); goto errout; - if (rtm->rtm_type != RTN_UNICAST) + } + if (rtm->rtm_type != RTN_UNICAST) { + NL_SET_ERR_MSG(extack, + "Invalid route type - MPLS only supports UNICAST"); goto errout; - if (rtm->rtm_flags != 0) + } + if (rtm->rtm_flags != 0) { + NL_SET_ERR_MSG(extack, "rtm_flags must be 0 for MPLS"); goto errout; + } cfg->rc_label = LABEL_NOT_SPECIFIED; cfg->rc_protocol = rtm->rtm_protocol; @@ -1696,26 +1768,26 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, case RTA_NEWDST: if (nla_get_labels(nla, MAX_NEW_LABELS, &cfg->rc_output_labels, - cfg->rc_output_label)) + cfg->rc_output_label, extack)) goto errout; break; case RTA_DST: { u8 label_count; if (nla_get_labels(nla, 1, &label_count, - &cfg->rc_label)) + &cfg->rc_label, extack)) goto errout; - /* Reserved labels may not be set */ - if (cfg->rc_label < MPLS_LABEL_FIRST_UNRESERVED) + if (!mpls_label_ok(cfg->rc_nlinfo.nl_net, + cfg->rc_label, extack)) goto errout; - break; } case RTA_VIA: { if (nla_get_via(nla, &cfg->rc_via_alen, - &cfg->rc_via_table, cfg->rc_via)) + &cfg->rc_via_table, cfg->rc_via, + extack)) goto errout; break; } @@ -1729,14 +1801,18 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, { u8 ttl_propagate = nla_get_u8(nla); - if (ttl_propagate > 1) + if (ttl_propagate > 1) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "RTA_TTL_PROPAGATE can only be 0 or 1"); goto errout; + } cfg->rc_ttl_propagate = ttl_propagate ? MPLS_TTL_PROP_ENABLED : MPLS_TTL_PROP_DISABLED; break; } default: + NL_SET_ERR_MSG_ATTR(extack, nla, "Unknown attribute"); /* Unsupported attribute */ goto errout; } @@ -1757,11 +1833,11 @@ static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (!cfg) return -ENOMEM; - err = rtm_to_route_config(skb, nlh, cfg); + err = rtm_to_route_config(skb, nlh, cfg, extack); if (err < 0) goto out; - err = mpls_route_del(cfg); + err = mpls_route_del(cfg, extack); out: kfree(cfg); @@ -1779,11 +1855,11 @@ static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (!cfg) return -ENOMEM; - err = rtm_to_route_config(skb, nlh, cfg); + err = rtm_to_route_config(skb, nlh, cfg, extack); if (err < 0) goto out; - err = mpls_route_add(cfg); + err = mpls_route_add(cfg, extack); out: kfree(cfg); diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 4db6a5971322..cf65aec2e551 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -203,9 +203,7 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]); int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels, - u32 label[]); -int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table, - u8 via[]); + u32 label[], struct netlink_ext_ack *extack); bool mpls_output_possible(const struct net_device *dev); unsigned int mpls_dev_mtu(const struct net_device *dev); bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu); diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 369c7a23c86c..6e558a419f60 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -159,7 +159,8 @@ drop: static int mpls_build_state(struct nlattr *nla, unsigned int family, const void *cfg, - struct lwtunnel_state **ts) + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) { struct mpls_iptunnel_encap *tun_encap_info; struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1]; @@ -168,17 +169,18 @@ static int mpls_build_state(struct nlattr *nla, int ret; ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla, - mpls_iptunnel_policy, NULL); + mpls_iptunnel_policy, extack); if (ret < 0) return ret; - if (!tb[MPLS_IPTUNNEL_DST]) + if (!tb[MPLS_IPTUNNEL_DST]) { + NL_SET_ERR_MSG(extack, "MPLS_IPTUNNEL_DST attribute is missing"); return -EINVAL; - + } /* determine number of labels */ - if (nla_get_labels(tb[MPLS_IPTUNNEL_DST], - MAX_NEW_LABELS, &n_labels, NULL)) + if (nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS, + &n_labels, NULL, extack)) return -EINVAL; newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) + @@ -188,7 +190,8 @@ static int mpls_build_state(struct nlattr *nla, tun_encap_info = mpls_lwtunnel_encap(newts); ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], n_labels, - &tun_encap_info->labels, tun_encap_info->label); + &tun_encap_info->labels, tun_encap_info->label, + extack); if (ret) goto errout; diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index a504e87c6ddf..49bd8bb16b18 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -152,7 +152,7 @@ void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info, struct synproxy_options *opts) { opts->tsecr = opts->tsval; - opts->tsval = tcp_time_stamp & ~0x3f; + opts->tsval = tcp_time_stamp_raw() & ~0x3f; if (opts->options & XT_SYNPROXY_OPT_WSCALE) { opts->tsval |= opts->wscale; diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c index 54e40fa47822..d3e594eb36d0 100644 --- a/net/nfc/af_nfc.c +++ b/net/nfc/af_nfc.c @@ -48,7 +48,7 @@ static int nfc_sock_create(struct net *net, struct socket *sock, int proto, return rc; } -static struct net_proto_family nfc_sock_family_ops = { +static const struct net_proto_family nfc_sock_family_ops = { .owner = THIS_MODULE, .family = PF_NFC, .create = nfc_sock_create, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 7b17da9a94a0..9ddc9f8412a2 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -453,7 +453,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, /* Complete checksum if needed */ if (skb->ip_summed == CHECKSUM_PARTIAL && - (err = skb_checksum_help(skb))) + (err = skb_csum_hwoffload_help(skb, 0))) goto out; /* Older versions of OVS user space enforce alignment of the last diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e3eeed19cc7a..f9349a495caf 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -188,7 +188,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, #define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x))) struct packet_sock; -static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); @@ -196,8 +195,7 @@ static void *packet_previous_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status); static void packet_increment_head(struct packet_ring_buffer *buff); -static int prb_curr_blk_in_use(struct tpacket_kbdq_core *, - struct tpacket_block_desc *); +static int prb_curr_blk_in_use(struct tpacket_block_desc *); static void *prb_dispatch_next_block(struct tpacket_kbdq_core *, struct packet_sock *); static void prb_retire_current_block(struct tpacket_kbdq_core *, @@ -721,7 +719,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data) /* Case 1. Queue was frozen because user-space was * lagging behind. */ - if (prb_curr_blk_in_use(pkc, pbd)) { + if (prb_curr_blk_in_use(pbd)) { /* * Ok, user-space is still behind. * So just refresh the timer. @@ -972,8 +970,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, } } -static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc, - struct tpacket_block_desc *pbd) +static int prb_curr_blk_in_use(struct tpacket_block_desc *pbd) { return TP_STATUS_USER & BLOCK_STATUS(pbd); } @@ -1064,7 +1061,7 @@ static void *__packet_lookup_frame_in_block(struct packet_sock *po, * Check if that last block which caused the queue to freeze, * is still in_use by user-space. */ - if (prb_curr_blk_in_use(pkc, pbd)) { + if (prb_curr_blk_in_use(pbd)) { /* Can't record this packet */ return NULL; } else { diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index a9a8c7d5a4a9..c7a5d861906b 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -111,6 +111,9 @@ struct qrtr_node { struct list_head item; }; +static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb); +static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb); + /* Release node resources and free the node. * * Do not call directly, use qrtr_node_release. To be used with @@ -245,14 +248,11 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) } EXPORT_SYMBOL_GPL(qrtr_endpoint_post); -/* Allocate and construct a resume-tx packet. */ -static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, - u32 dst_node, u32 port) +static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len, + u32 src_node, u32 dst_node) { - const int pkt_len = 20; struct qrtr_hdr *hdr; struct sk_buff *skb; - __le32 *buf; skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL); if (!skb) @@ -261,7 +261,7 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, hdr = (struct qrtr_hdr *)skb_put(skb, QRTR_HDR_SIZE); hdr->version = cpu_to_le32(QRTR_PROTO_VER); - hdr->type = cpu_to_le32(QRTR_TYPE_RESUME_TX); + hdr->type = cpu_to_le32(type); hdr->src_node_id = cpu_to_le32(src_node); hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL); hdr->confirm_rx = cpu_to_le32(0); @@ -269,6 +269,22 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, hdr->dst_node_id = cpu_to_le32(dst_node); hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); + return skb; +} + +/* Allocate and construct a resume-tx packet. */ +static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, + u32 dst_node, u32 port) +{ + const int pkt_len = 20; + struct sk_buff *skb; + __le32 *buf; + + skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_RESUME_TX, pkt_len, + src_node, dst_node); + if (!skb) + return NULL; + buf = (__le32 *)skb_put(skb, pkt_len); memset(buf, 0, pkt_len); buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX); @@ -278,6 +294,45 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, return skb; } +/* Allocate and construct a BYE message to signal remote termination */ +static struct sk_buff *qrtr_alloc_local_bye(u32 src_node) +{ + const int pkt_len = 20; + struct sk_buff *skb; + __le32 *buf; + + skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_BYE, pkt_len, + src_node, qrtr_local_nid); + if (!skb) + return NULL; + + buf = (__le32 *)skb_put(skb, pkt_len); + memset(buf, 0, pkt_len); + buf[0] = cpu_to_le32(QRTR_TYPE_BYE); + + return skb; +} + +static struct sk_buff *qrtr_alloc_del_client(struct sockaddr_qrtr *sq) +{ + const int pkt_len = 20; + struct sk_buff *skb; + __le32 *buf; + + skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_DEL_CLIENT, pkt_len, + sq->sq_node, QRTR_NODE_BCAST); + if (!skb) + return NULL; + + buf = (__le32 *)skb_put(skb, pkt_len); + memset(buf, 0, pkt_len); + buf[0] = cpu_to_le32(QRTR_TYPE_DEL_CLIENT); + buf[1] = cpu_to_le32(sq->sq_node); + buf[2] = cpu_to_le32(sq->sq_port); + + return skb; +} + static struct qrtr_sock *qrtr_port_lookup(int port); static void qrtr_port_put(struct qrtr_sock *ipc); @@ -369,11 +424,17 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_register); void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) { struct qrtr_node *node = ep->node; + struct sk_buff *skb; mutex_lock(&node->ep_lock); node->ep = NULL; mutex_unlock(&node->ep_lock); + /* Notify the local controller about the event */ + skb = qrtr_alloc_local_bye(node->nid); + if (skb) + qrtr_local_enqueue(NULL, skb); + qrtr_node_release(node); ep->node = NULL; } @@ -408,8 +469,15 @@ static void qrtr_port_put(struct qrtr_sock *ipc) /* Remove port assignment. */ static void qrtr_port_remove(struct qrtr_sock *ipc) { + struct sk_buff *skb; int port = ipc->us.sq_port; + skb = qrtr_alloc_del_client(&ipc->us); + if (skb) { + skb_set_owner_w(skb, &ipc->sk); + qrtr_bcast_enqueue(NULL, skb); + } + if (port == QRTR_PORT_CTRL) port = 0; @@ -462,6 +530,26 @@ static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) return 0; } +/* Reset all non-control ports */ +static void qrtr_reset_ports(void) +{ + struct qrtr_sock *ipc; + int id; + + mutex_lock(&qrtr_port_lock); + idr_for_each_entry(&qrtr_ports, ipc, id) { + /* Don't reset control port */ + if (id == 0) + continue; + + sock_hold(&ipc->sk); + ipc->sk.sk_err = ENETRESET; + wake_up_interruptible(sk_sleep(&ipc->sk)); + sock_put(&ipc->sk); + } + mutex_unlock(&qrtr_port_lock); +} + /* Bind socket to address. * * Socket should be locked upon call. @@ -490,6 +578,10 @@ static int __qrtr_bind(struct socket *sock, sock_reset_flag(sk, SOCK_ZAPPED); + /* Notify all open ports about the new controller */ + if (port == QRTR_PORT_CTRL) + qrtr_reset_ports(); + return 0; } diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 76c01cbd56e3..41bd496531d4 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -81,8 +81,7 @@ static int rfkill_gpio_acpi_probe(struct device *dev, rfkill->type = (unsigned)id->driver_data; - return acpi_dev_add_driver_gpios(ACPI_COMPANION(dev), - acpi_rfkill_default_gpios); + return devm_acpi_dev_add_driver_gpios(dev, acpi_rfkill_default_gpios); } static int rfkill_gpio_probe(struct platform_device *pdev) @@ -154,8 +153,6 @@ static int rfkill_gpio_remove(struct platform_device *pdev) rfkill_unregister(rfkill->rfkill_dev); rfkill_destroy(rfkill->rfkill_dev); - acpi_dev_remove_driver_gpios(ACPI_COMPANION(&pdev->dev)); - return 0; } diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index b9da4d6b914f..9c68d2f8ba39 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -19,6 +19,7 @@ rxrpc-y := \ local_event.o \ local_object.o \ misc.o \ + net_ns.o \ output.o \ peer_event.o \ peer_object.o \ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 7fb59c3f1542..58ae0db52ea1 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -38,9 +38,6 @@ MODULE_PARM_DESC(debug, "RxRPC debugging mask"); static struct proto rxrpc_proto; static const struct proto_ops rxrpc_rpc_ops; -/* local epoch for detecting local-end reset */ -u32 rxrpc_epoch; - /* current debugging ID */ atomic_t rxrpc_debug_id; @@ -134,9 +131,8 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx, static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) { struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr; - struct sock *sk = sock->sk; struct rxrpc_local *local; - struct rxrpc_sock *rx = rxrpc_sk(sk); + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); u16 service_id = srx->srx_service; int ret; @@ -148,31 +144,48 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) lock_sock(&rx->sk); - if (rx->sk.sk_state != RXRPC_UNBOUND) { - ret = -EINVAL; - goto error_unlock; - } - - memcpy(&rx->srx, srx, sizeof(rx->srx)); + switch (rx->sk.sk_state) { + case RXRPC_UNBOUND: + rx->srx = *srx; + local = rxrpc_lookup_local(sock_net(&rx->sk), &rx->srx); + if (IS_ERR(local)) { + ret = PTR_ERR(local); + goto error_unlock; + } - local = rxrpc_lookup_local(&rx->srx); - if (IS_ERR(local)) { - ret = PTR_ERR(local); - goto error_unlock; - } + if (service_id) { + write_lock(&local->services_lock); + if (rcu_access_pointer(local->service)) + goto service_in_use; + rx->local = local; + rcu_assign_pointer(local->service, rx); + write_unlock(&local->services_lock); + + rx->sk.sk_state = RXRPC_SERVER_BOUND; + } else { + rx->local = local; + rx->sk.sk_state = RXRPC_CLIENT_BOUND; + } + break; - if (service_id) { - write_lock(&local->services_lock); - if (rcu_access_pointer(local->service)) - goto service_in_use; - rx->local = local; - rcu_assign_pointer(local->service, rx); - write_unlock(&local->services_lock); + case RXRPC_SERVER_BOUND: + ret = -EINVAL; + if (service_id == 0) + goto error_unlock; + ret = -EADDRINUSE; + if (service_id == rx->srx.srx_service) + goto error_unlock; + ret = -EINVAL; + srx->srx_service = rx->srx.srx_service; + if (memcmp(srx, &rx->srx, sizeof(*srx)) != 0) + goto error_unlock; + rx->second_service = service_id; + rx->sk.sk_state = RXRPC_SERVER_BOUND2; + break; - rx->sk.sk_state = RXRPC_SERVER_BOUND; - } else { - rx->local = local; - rx->sk.sk_state = RXRPC_CLIENT_BOUND; + default: + ret = -EINVAL; + goto error_unlock; } release_sock(&rx->sk); @@ -209,6 +222,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) ret = -EADDRNOTAVAIL; break; case RXRPC_SERVER_BOUND: + case RXRPC_SERVER_BOUND2: ASSERT(rx->local != NULL); max = READ_ONCE(rxrpc_max_backlog); ret = -EINVAL; @@ -248,6 +262,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @srx: The address of the peer to contact * @key: The security context to use (defaults to socket setting) * @user_call_ID: The ID to use + * @tx_total_len: Total length of data to transmit during the call (or -1) * @gfp: The allocation constraints * @notify_rx: Where to send notifications instead of socket queue * @@ -262,6 +277,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, struct sockaddr_rxrpc *srx, struct key *key, unsigned long user_call_ID, + s64 tx_total_len, gfp_t gfp, rxrpc_notify_rx_t notify_rx) { @@ -289,7 +305,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.security_level = 0; cp.exclusive = false; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp); + call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len, + gfp); /* The socket has been unlocked. */ if (!IS_ERR(call)) call->notify_rx = notify_rx; @@ -434,7 +451,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) ret = -EAFNOSUPPORT; goto error_unlock; } - local = rxrpc_lookup_local(&rx->srx); + local = rxrpc_lookup_local(sock_net(sock->sk), &rx->srx); if (IS_ERR(local)) { ret = PTR_ERR(local); goto error_unlock; @@ -476,6 +493,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); unsigned int min_sec_level; + u16 service_upgrade[2]; int ret; _enter(",%d,%d,,%d", level, optname, optlen); @@ -532,6 +550,28 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, rx->min_sec_level = min_sec_level; goto success; + case RXRPC_UPGRADEABLE_SERVICE: + ret = -EINVAL; + if (optlen != sizeof(service_upgrade) || + rx->service_upgrade.from != 0) + goto error; + ret = -EISCONN; + if (rx->sk.sk_state != RXRPC_SERVER_BOUND2) + goto error; + ret = -EFAULT; + if (copy_from_user(service_upgrade, optval, + sizeof(service_upgrade)) != 0) + goto error; + ret = -EINVAL; + if ((service_upgrade[0] != rx->srx.srx_service || + service_upgrade[1] != rx->second_service) && + (service_upgrade[0] != rx->second_service || + service_upgrade[1] != rx->srx.srx_service)) + goto error; + rx->service_upgrade.from = service_upgrade[0]; + rx->service_upgrade.to = service_upgrade[1]; + goto success; + default: break; } @@ -545,6 +585,34 @@ error: } /* + * Get socket options. + */ +static int rxrpc_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *_optlen) +{ + int optlen; + + if (level != SOL_RXRPC) + return -EOPNOTSUPP; + + if (get_user(optlen, _optlen)) + return -EFAULT; + + switch (optname) { + case RXRPC_SUPPORTED_CMSG: + if (optlen < sizeof(int)) + return -ETOOSMALL; + if (put_user(RXRPC__SUPPORTED - 1, (int __user *)optval) || + put_user(sizeof(int), _optlen)) + return -EFAULT; + return 0; + + default: + return -EOPNOTSUPP; + } +} + +/* * permit an RxRPC socket to be polled */ static unsigned int rxrpc_poll(struct file *file, struct socket *sock, @@ -582,9 +650,6 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, _enter("%p,%d", sock, protocol); - if (!net_eq(net, &init_net)) - return -EAFNOSUPPORT; - /* we support transport protocol UDP/UDP6 only */ if (protocol != PF_INET && IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6) @@ -750,7 +815,7 @@ static const struct proto_ops rxrpc_rpc_ops = { .listen = rxrpc_listen, .shutdown = rxrpc_shutdown, .setsockopt = rxrpc_setsockopt, - .getsockopt = sock_no_getsockopt, + .getsockopt = rxrpc_getsockopt, .sendmsg = rxrpc_sendmsg, .recvmsg = rxrpc_recvmsg, .mmap = sock_no_mmap, @@ -780,8 +845,6 @@ static int __init af_rxrpc_init(void) BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb)); - get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch)); - rxrpc_epoch |= RXRPC_RANDOM_EPOCH; get_random_bytes(&tmp, sizeof(tmp)); tmp &= 0x3fffffff; if (tmp == 0) @@ -809,6 +872,10 @@ static int __init af_rxrpc_init(void) goto error_security; } + ret = register_pernet_subsys(&rxrpc_net_ops); + if (ret) + goto error_pernet; + ret = proto_register(&rxrpc_proto, 1); if (ret < 0) { pr_crit("Cannot register protocol\n"); @@ -839,11 +906,6 @@ static int __init af_rxrpc_init(void) goto error_sysctls; } -#ifdef CONFIG_PROC_FS - proc_create("rxrpc_calls", 0, init_net.proc_net, &rxrpc_call_seq_fops); - proc_create("rxrpc_conns", 0, init_net.proc_net, - &rxrpc_connection_seq_fops); -#endif return 0; error_sysctls: @@ -855,6 +917,8 @@ error_key_type: error_sock: proto_unregister(&rxrpc_proto); error_proto: + unregister_pernet_subsys(&rxrpc_net_ops); +error_pernet: rxrpc_exit_security(); error_security: destroy_workqueue(rxrpc_workqueue); @@ -875,14 +939,16 @@ static void __exit af_rxrpc_exit(void) unregister_key_type(&key_type_rxrpc); sock_unregister(PF_RXRPC); proto_unregister(&rxrpc_proto); - rxrpc_destroy_all_calls(); - rxrpc_destroy_all_connections(); + unregister_pernet_subsys(&rxrpc_net_ops); ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0); ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0); - rxrpc_destroy_all_locals(); - remove_proc_entry("rxrpc_conns", init_net.proc_net); - remove_proc_entry("rxrpc_calls", init_net.proc_net); + /* Make sure the local and peer records pinned by any dying connections + * are released. + */ + rcu_barrier(); + rxrpc_destroy_client_conn_ids(); + destroy_workqueue(rxrpc_workqueue); rxrpc_exit_security(); kmem_cache_destroy(rxrpc_call_jar); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 7486926e60a8..69b97339ff9d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -11,6 +11,8 @@ #include <linux/atomic.h> #include <linux/seqlock.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <rxrpc/packet.h> @@ -59,12 +61,44 @@ enum { RXRPC_CLIENT_UNBOUND, /* Unbound socket used as client */ RXRPC_CLIENT_BOUND, /* client local address bound */ RXRPC_SERVER_BOUND, /* server local address bound */ + RXRPC_SERVER_BOUND2, /* second server local address bound */ RXRPC_SERVER_LISTENING, /* server listening for connections */ RXRPC_SERVER_LISTEN_DISABLED, /* server listening disabled */ RXRPC_CLOSE, /* socket is being closed */ }; /* + * Per-network namespace data. + */ +struct rxrpc_net { + struct proc_dir_entry *proc_net; /* Subdir in /proc/net */ + u32 epoch; /* Local epoch for detecting local-end reset */ + struct list_head calls; /* List of calls active in this namespace */ + rwlock_t call_lock; /* Lock for ->calls */ + + struct list_head conn_proc_list; /* List of conns in this namespace for proc */ + struct list_head service_conns; /* Service conns in this namespace */ + rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */ + struct delayed_work service_conn_reaper; + + unsigned int nr_client_conns; + unsigned int nr_active_client_conns; + bool kill_all_client_conns; + spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ + spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ + struct list_head waiting_client_conns; + struct list_head active_client_conns; + struct list_head idle_client_conns; + struct delayed_work client_conn_reaper; + + struct list_head local_endpoints; + struct mutex local_mutex; /* Lock for ->local_endpoints */ + + spinlock_t peer_hash_lock; /* Lock for ->peer_hash */ + DECLARE_HASHTABLE (peer_hash, 10); +}; + +/* * Service backlog preallocation. * * This contains circular buffers of preallocated peers, connections and calls @@ -109,8 +143,14 @@ struct rxrpc_sock { u32 min_sec_level; /* minimum security level */ #define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT bool exclusive; /* Exclusive connection for a client socket */ + u16 second_service; /* Additional service bound to the endpoint */ + struct { + /* Service upgrade information */ + u16 from; /* Service ID to upgrade (if not 0) */ + u16 to; /* service ID to upgrade to */ + } service_upgrade; sa_family_t family; /* Protocol family created with */ - struct sockaddr_rxrpc srx; /* local address */ + struct sockaddr_rxrpc srx; /* Primary Service/local addresses */ struct sockaddr_rxrpc connect_srx; /* Default client address from connect() */ }; @@ -211,6 +251,7 @@ struct rxrpc_security { struct rxrpc_local { struct rcu_head rcu; atomic_t usage; + struct rxrpc_net *rxnet; /* The network ns in which this resides */ struct list_head link; struct socket *socket; /* my UDP socket */ struct work_struct processor; @@ -259,6 +300,8 @@ struct rxrpc_peer { u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */ u8 rtt_cursor; /* next entry at which to insert */ u8 rtt_usage; /* amount of cache actually used */ + + u8 cong_cwnd; /* Congestion window size */ }; /* @@ -279,6 +322,7 @@ struct rxrpc_conn_parameters { struct rxrpc_peer *peer; /* Remote endpoint */ struct key *key; /* Security details */ bool exclusive; /* T if conn is exclusive */ + bool upgrade; /* T if service ID can be upgraded */ u16 service_id; /* Service ID for this connection */ u32 security_level; /* Security level selected */ }; @@ -293,6 +337,7 @@ enum rxrpc_conn_flag { RXRPC_CONN_EXPOSED, /* Conn has extra ref for exposure */ RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */ + RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ }; /* @@ -309,6 +354,7 @@ enum rxrpc_conn_cache_state { RXRPC_CONN_CLIENT_INACTIVE, /* Conn is not yet listed */ RXRPC_CONN_CLIENT_WAITING, /* Conn is on wait list, waiting for capacity */ RXRPC_CONN_CLIENT_ACTIVE, /* Conn is on active list, doing calls */ + RXRPC_CONN_CLIENT_UPGRADE, /* Conn is on active list, probing for upgrade */ RXRPC_CONN_CLIENT_CULLED, /* Conn is culled and delisted, doing calls */ RXRPC_CONN_CLIENT_IDLE, /* Conn is on idle list, doing mostly nothing */ RXRPC_CONN__NR_CACHE_STATES @@ -352,7 +398,6 @@ struct rxrpc_connection { u32 call_counter; /* Call ID counter */ u32 last_call; /* ID of last call */ u8 last_type; /* Type of last packet */ - u16 last_service_id; union { u32 last_seq; u32 last_abort; @@ -383,6 +428,7 @@ struct rxrpc_connection { atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ u32 security_nonce; /* response re-use preventer */ + u16 service_id; /* Service ID, possibly upgraded */ u8 size_align; /* data size alignment (for security) */ u8 security_size; /* security header size */ u8 security_ix; /* security type */ @@ -484,6 +530,7 @@ struct rxrpc_call { struct rb_node sock_node; /* Node in rx->calls */ struct sk_buff *tx_pending; /* Tx socket buffer being filled */ wait_queue_head_t waitq; /* Wait queue for channel or Tx */ + s64 tx_total_len; /* Total length left to be transmitted (or -1) */ __be32 crypto_buf[2]; /* Temporary packet crypto buffer */ unsigned long user_call_ID; /* user-defined call ID */ unsigned long flags; @@ -601,7 +648,6 @@ struct rxrpc_ack_summary { * af_rxrpc.c */ extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; -extern u32 rxrpc_epoch; extern atomic_t rxrpc_debug_id; extern struct workqueue_struct *rxrpc_workqueue; @@ -634,15 +680,13 @@ extern const char *const rxrpc_call_states[]; extern const char *const rxrpc_call_completions[]; extern unsigned int rxrpc_max_call_lifetime; extern struct kmem_cache *rxrpc_call_jar; -extern struct list_head rxrpc_calls; -extern rwlock_t rxrpc_call_lock; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); struct rxrpc_call *rxrpc_alloc_call(gfp_t); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, - unsigned long, gfp_t); + unsigned long, s64, gfp_t); void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); @@ -653,7 +697,7 @@ void rxrpc_see_call(struct rxrpc_call *); void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_cleanup_call(struct rxrpc_call *); -void __exit rxrpc_destroy_all_calls(void); +void rxrpc_destroy_all_calls(struct rxrpc_net *); static inline bool rxrpc_is_service_call(const struct rxrpc_call *call) { @@ -773,7 +817,8 @@ int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *, void rxrpc_expose_client_call(struct rxrpc_call *); void rxrpc_disconnect_client_call(struct rxrpc_call *); void rxrpc_put_client_conn(struct rxrpc_connection *); -void __exit rxrpc_destroy_all_client_connections(void); +void rxrpc_discard_expired_client_conns(struct work_struct *); +void rxrpc_destroy_all_client_connections(struct rxrpc_net *); /* * conn_event.c @@ -784,9 +829,6 @@ void rxrpc_process_connection(struct work_struct *); * conn_object.c */ extern unsigned int rxrpc_connection_expiry; -extern struct list_head rxrpc_connections; -extern struct list_head rxrpc_connection_proc_list; -extern rwlock_t rxrpc_connection_lock; int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); @@ -800,7 +842,8 @@ void rxrpc_see_connection(struct rxrpc_connection *); void rxrpc_get_connection(struct rxrpc_connection *); struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *); void rxrpc_put_service_conn(struct rxrpc_connection *); -void __exit rxrpc_destroy_all_connections(void); +void rxrpc_service_connection_reaper(struct work_struct *); +void rxrpc_destroy_all_connections(struct rxrpc_net *); static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn) { @@ -828,8 +871,9 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn) */ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, struct sk_buff *); -struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t); -void rxrpc_new_incoming_connection(struct rxrpc_connection *, struct sk_buff *); +struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *, gfp_t); +void rxrpc_new_incoming_connection(struct rxrpc_sock *, + struct rxrpc_connection *, struct sk_buff *); void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* @@ -861,9 +905,9 @@ extern void rxrpc_process_local_events(struct rxrpc_local *); /* * local_object.c */ -struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *); +struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *); void __rxrpc_put_local(struct rxrpc_local *); -void __exit rxrpc_destroy_all_locals(void); +void rxrpc_destroy_all_locals(struct rxrpc_net *); static inline void rxrpc_get_local(struct rxrpc_local *local) { @@ -902,6 +946,17 @@ extern unsigned int rxrpc_resend_timeout; extern const s8 rxrpc_ack_priority[]; /* + * net_ns.c + */ +extern unsigned int rxrpc_net_id; +extern struct pernet_operations rxrpc_net_ops; + +static inline struct rxrpc_net *rxrpc_net(struct net *net) +{ + return net_generic(net, rxrpc_net_id); +} + +/* * output.c */ int rxrpc_send_ack_packet(struct rxrpc_call *, bool); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 1752fcf8e8f1..dd30d74824b0 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -38,6 +38,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, { const void *here = __builtin_return_address(0); struct rxrpc_call *call; + struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); int max, tmp; unsigned int size = RXRPC_BACKLOG_MAX; unsigned int head, tail, call_head, call_tail; @@ -79,7 +80,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, if (CIRC_CNT(head, tail, size) < max) { struct rxrpc_connection *conn; - conn = rxrpc_prealloc_service_connection(gfp); + conn = rxrpc_prealloc_service_connection(rxnet, gfp); if (!conn) return -ENOMEM; b->conn_backlog[head] = conn; @@ -136,9 +137,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); - write_lock(&rxrpc_call_lock); - list_add_tail(&call->link, &rxrpc_calls); - write_unlock(&rxrpc_call_lock); + write_lock(&rxnet->call_lock); + list_add_tail(&call->link, &rxnet->calls); + write_unlock(&rxnet->call_lock); b->call_backlog[call_head] = call; smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); @@ -185,6 +186,7 @@ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) void rxrpc_discard_prealloc(struct rxrpc_sock *rx) { struct rxrpc_backlog *b = rx->backlog; + struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); unsigned int size = RXRPC_BACKLOG_MAX, head, tail; if (!b) @@ -209,10 +211,10 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) tail = b->conn_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_connection *conn = b->conn_backlog[tail]; - write_lock(&rxrpc_connection_lock); + write_lock(&rxnet->conn_lock); list_del(&conn->link); list_del(&conn->proc_link); - write_unlock(&rxrpc_connection_lock); + write_unlock(&rxnet->conn_lock); kfree(conn); tail = (tail + 1) & (size - 1); } @@ -294,7 +296,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, conn->params.local = local; conn->params.peer = peer; rxrpc_see_connection(conn); - rxrpc_new_incoming_connection(conn, skb); + rxrpc_new_incoming_connection(rx, conn, skb); } else { rxrpc_get_connection(conn); } @@ -308,6 +310,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, rxrpc_see_call(call); call->conn = conn; call->peer = rxrpc_get_peer(conn->params.peer); + call->cong_cwnd = call->peer->cong_cwnd; return call; } @@ -339,7 +342,8 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, /* Get the socket providing the service */ rx = rcu_dereference(local->service); - if (rx && service_id == rx->srx.srx_service) + if (rx && (service_id == rx->srx.srx_service || + service_id == rx->second_service)) goto found_service; trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 47f7f4205653..d7809a0620b4 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -44,8 +44,6 @@ const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { }; struct kmem_cache *rxrpc_call_jar; -LIST_HEAD(rxrpc_calls); -DEFINE_RWLOCK(rxrpc_call_lock); static void rxrpc_call_timer_expired(unsigned long _call) { @@ -129,6 +127,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) rwlock_init(&call->state_lock); atomic_set(&call->usage, 1); call->debug_id = atomic_inc_return(&rxrpc_debug_id); + call->tx_total_len = -1; memset(&call->sock_node, 0xed, sizeof(call->sock_node)); @@ -137,12 +136,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) call->tx_winsize = 16; call->rx_expect_next = 1; - if (RXRPC_TX_SMSS > 2190) - call->cong_cwnd = 2; - else if (RXRPC_TX_SMSS > 1095) - call->cong_cwnd = 3; - else - call->cong_cwnd = 4; + call->cong_cwnd = 2; call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1; return call; @@ -203,10 +197,12 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_conn_parameters *cp, struct sockaddr_rxrpc *srx, unsigned long user_call_ID, + s64 tx_total_len, gfp_t gfp) __releases(&rx->sk.sk_lock.slock) { struct rxrpc_call *call, *xcall; + struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); struct rb_node *parent, **pp; const void *here = __builtin_return_address(0); int ret; @@ -220,6 +216,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } + call->tx_total_len = tx_total_len; trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), here, (const void *)user_call_ID); @@ -255,9 +252,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); - write_lock(&rxrpc_call_lock); - list_add_tail(&call->link, &rxrpc_calls); - write_unlock(&rxrpc_call_lock); + write_lock(&rxnet->call_lock); + list_add_tail(&call->link, &rxnet->calls); + write_unlock(&rxnet->call_lock); /* From this point on, the call is protected by its own lock. */ release_sock(&rx->sk); @@ -508,6 +505,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) */ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { + struct rxrpc_net *rxnet; const void *here = __builtin_return_address(0); int n; @@ -520,9 +518,12 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) _debug("call %d dead", call->debug_id); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - write_lock(&rxrpc_call_lock); - list_del_init(&call->link); - write_unlock(&rxrpc_call_lock); + if (!list_empty(&call->link)) { + rxnet = rxrpc_net(sock_net(&call->socket->sk)); + write_lock(&rxnet->call_lock); + list_del_init(&call->link); + write_unlock(&rxnet->call_lock); + } rxrpc_cleanup_call(call); } @@ -570,21 +571,23 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) } /* - * Make sure that all calls are gone. + * Make sure that all calls are gone from a network namespace. To reach this + * point, any open UDP sockets in that namespace must have been closed, so any + * outstanding calls cannot be doing I/O. */ -void __exit rxrpc_destroy_all_calls(void) +void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) { struct rxrpc_call *call; _enter(""); - if (list_empty(&rxrpc_calls)) + if (list_empty(&rxnet->calls)) return; - write_lock(&rxrpc_call_lock); + write_lock(&rxnet->call_lock); - while (!list_empty(&rxrpc_calls)) { - call = list_entry(rxrpc_calls.next, struct rxrpc_call, link); + while (!list_empty(&rxnet->calls)) { + call = list_entry(rxnet->calls.next, struct rxrpc_call, link); _debug("Zapping call %p", call); rxrpc_see_call(call); @@ -595,10 +598,10 @@ void __exit rxrpc_destroy_all_calls(void) rxrpc_call_states[call->state], call->flags, call->events); - write_unlock(&rxrpc_call_lock); + write_unlock(&rxnet->call_lock); cond_resched(); - write_lock(&rxrpc_call_lock); + write_lock(&rxnet->call_lock); } - write_unlock(&rxrpc_call_lock); + write_unlock(&rxnet->call_lock); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index e8dea0d49e7f..eb2157680399 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -31,22 +31,25 @@ * may freely grant available channels to new calls and calls may be * waiting on it for channels to become available. * - * The connection is on the rxrpc_active_client_conns list which is kept + * The connection is on the rxnet->active_client_conns list which is kept * in activation order for culling purposes. * * rxrpc_nr_active_client_conns is held incremented also. * - * (4) CULLED - The connection got summarily culled to try and free up + * (4) UPGRADE - As for ACTIVE, but only one call may be in progress and is + * being used to probe for service upgrade. + * + * (5) CULLED - The connection got summarily culled to try and free up * capacity. Calls currently in progress on the connection are allowed to * continue, but new calls will have to wait. There can be no waiters in * this state - the conn would have to go to the WAITING state instead. * - * (5) IDLE - The connection has no calls in progress upon it and must have + * (6) IDLE - The connection has no calls in progress upon it and must have * been exposed to the world (ie. the EXPOSED flag must be set). When it * expires, the EXPOSED flag is cleared and the connection transitions to * the INACTIVE state. * - * The connection is on the rxrpc_idle_client_conns list which is kept in + * The connection is on the rxnet->idle_client_conns list which is kept in * order of how soon they'll expire. * * There are flags of relevance to the cache: @@ -85,27 +88,13 @@ __read_mostly unsigned int rxrpc_reap_client_connections = 900; __read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; __read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ; -static unsigned int rxrpc_nr_client_conns; -static unsigned int rxrpc_nr_active_client_conns; -static __read_mostly bool rxrpc_kill_all_client_conns; - -static DEFINE_SPINLOCK(rxrpc_client_conn_cache_lock); -static DEFINE_SPINLOCK(rxrpc_client_conn_discard_mutex); -static LIST_HEAD(rxrpc_waiting_client_conns); -static LIST_HEAD(rxrpc_active_client_conns); -static LIST_HEAD(rxrpc_idle_client_conns); - /* * We use machine-unique IDs for our client connections. */ DEFINE_IDR(rxrpc_client_conn_ids); static DEFINE_SPINLOCK(rxrpc_conn_id_lock); -static void rxrpc_cull_active_client_conns(void); -static void rxrpc_discard_expired_client_conns(struct work_struct *); - -static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap, - rxrpc_discard_expired_client_conns); +static void rxrpc_cull_active_client_conns(struct rxrpc_net *); /* * Get a connection ID and epoch for a client connection from the global pool. @@ -116,6 +105,7 @@ static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap, static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, gfp_t gfp) { + struct rxrpc_net *rxnet = conn->params.local->rxnet; int id; _enter(""); @@ -131,7 +121,7 @@ static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, spin_unlock(&rxrpc_conn_id_lock); idr_preload_end(); - conn->proto.epoch = rxrpc_epoch; + conn->proto.epoch = rxnet->epoch; conn->proto.cid = id << RXRPC_CIDSHIFT; set_bit(RXRPC_CONN_HAS_IDR, &conn->flags); _leave(" [CID %x]", conn->proto.cid); @@ -183,6 +173,7 @@ static struct rxrpc_connection * rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) { struct rxrpc_connection *conn; + struct rxrpc_net *rxnet = cp->local->rxnet; int ret; _enter(""); @@ -196,10 +187,13 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) atomic_set(&conn->usage, 1); if (cp->exclusive) __set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + if (cp->upgrade) + __set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags); conn->params = *cp; conn->out_clientflag = RXRPC_CLIENT_INITIATED; conn->state = RXRPC_CONN_CLIENT; + conn->service_id = cp->service_id; ret = rxrpc_get_client_connection_id(conn, gfp); if (ret < 0) @@ -213,9 +207,9 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) if (ret < 0) goto error_2; - write_lock(&rxrpc_connection_lock); - list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); - write_unlock(&rxrpc_connection_lock); + write_lock(&rxnet->conn_lock); + list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); + write_unlock(&rxnet->conn_lock); /* We steal the caller's peer ref. */ cp->peer = NULL; @@ -243,12 +237,13 @@ error_0: */ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) { + struct rxrpc_net *rxnet = conn->params.local->rxnet; int id_cursor, id, distance, limit; if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags)) goto dont_reuse; - if (conn->proto.epoch != rxrpc_epoch) + if (conn->proto.epoch != rxnet->epoch) goto mark_dont_reuse; /* The IDR tree gets very expensive on memory if the connection IDs are @@ -297,6 +292,12 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, if (!cp->peer) goto error; + call->cong_cwnd = cp->peer->cong_cwnd; + if (call->cong_cwnd >= call->cong_ssthresh) + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + else + call->cong_mode = RXRPC_CALL_SLOW_START; + /* If the connection is not meant to be exclusive, search the available * connections to see if the connection we want to use already exists. */ @@ -310,7 +311,8 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, #define cmp(X) ((long)conn->params.X - (long)cp->X) diff = (cmp(peer) ?: cmp(key) ?: - cmp(security_level)); + cmp(security_level) ?: + cmp(upgrade)); #undef cmp if (diff < 0) { p = p->rb_left; @@ -354,6 +356,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, if (cp->exclusive) { call->conn = candidate; call->security_ix = candidate->security_ix; + call->service_id = candidate->service_id; _leave(" = 0 [exclusive %d]", candidate->debug_id); return 0; } @@ -374,7 +377,8 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, #define cmp(X) ((long)conn->params.X - (long)candidate->params.X) diff = (cmp(peer) ?: cmp(key) ?: - cmp(security_level)); + cmp(security_level) ?: + cmp(upgrade)); #undef cmp if (diff < 0) { pp = &(*pp)->rb_left; @@ -403,6 +407,7 @@ candidate_published: set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); call->conn = candidate; call->security_ix = candidate->security_ix; + call->service_id = candidate->service_id; spin_unlock(&local->client_conns_lock); _leave(" = 0 [new %d]", candidate->debug_id); return 0; @@ -424,6 +429,7 @@ found_extant_conn: spin_lock(&conn->channel_lock); call->conn = conn; call->security_ix = conn->security_ix; + call->service_id = conn->service_id; list_add(&call->chan_wait_link, &conn->waiting_calls); spin_unlock(&conn->channel_lock); _leave(" = 0 [extant %d]", conn->debug_id); @@ -440,12 +446,18 @@ error: /* * Activate a connection. */ -static void rxrpc_activate_conn(struct rxrpc_connection *conn) +static void rxrpc_activate_conn(struct rxrpc_net *rxnet, + struct rxrpc_connection *conn) { - trace_rxrpc_client(conn, -1, rxrpc_client_to_active); - conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE; - rxrpc_nr_active_client_conns++; - list_move_tail(&conn->cache_link, &rxrpc_active_client_conns); + if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) { + trace_rxrpc_client(conn, -1, rxrpc_client_to_upgrade); + conn->cache_state = RXRPC_CONN_CLIENT_UPGRADE; + } else { + trace_rxrpc_client(conn, -1, rxrpc_client_to_active); + conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE; + } + rxnet->nr_active_client_conns++; + list_move_tail(&conn->cache_link, &rxnet->active_client_conns); } /* @@ -460,25 +472,28 @@ static void rxrpc_activate_conn(struct rxrpc_connection *conn) * channels if it has been culled to make space and then re-requested by a new * call. */ -static void rxrpc_animate_client_conn(struct rxrpc_connection *conn) +static void rxrpc_animate_client_conn(struct rxrpc_net *rxnet, + struct rxrpc_connection *conn) { unsigned int nr_conns; _enter("%d,%d", conn->debug_id, conn->cache_state); - if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE) + if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE || + conn->cache_state == RXRPC_CONN_CLIENT_UPGRADE) goto out; - spin_lock(&rxrpc_client_conn_cache_lock); + spin_lock(&rxnet->client_conn_cache_lock); - nr_conns = rxrpc_nr_client_conns; + nr_conns = rxnet->nr_client_conns; if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) { trace_rxrpc_client(conn, -1, rxrpc_client_count); - rxrpc_nr_client_conns = nr_conns + 1; + rxnet->nr_client_conns = nr_conns + 1; } switch (conn->cache_state) { case RXRPC_CONN_CLIENT_ACTIVE: + case RXRPC_CONN_CLIENT_UPGRADE: case RXRPC_CONN_CLIENT_WAITING: break; @@ -494,21 +509,21 @@ static void rxrpc_animate_client_conn(struct rxrpc_connection *conn) } out_unlock: - spin_unlock(&rxrpc_client_conn_cache_lock); + spin_unlock(&rxnet->client_conn_cache_lock); out: _leave(" [%d]", conn->cache_state); return; activate_conn: _debug("activate"); - rxrpc_activate_conn(conn); + rxrpc_activate_conn(rxnet, conn); goto out_unlock; wait_for_capacity: _debug("wait"); trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); conn->cache_state = RXRPC_CONN_CLIENT_WAITING; - list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns); + list_move_tail(&conn->cache_link, &rxnet->waiting_client_conns); goto out_unlock; } @@ -582,6 +597,9 @@ static void rxrpc_activate_channels_locked(struct rxrpc_connection *conn) case RXRPC_CONN_CLIENT_ACTIVE: mask = RXRPC_ACTIVE_CHANS_MASK; break; + case RXRPC_CONN_CLIENT_UPGRADE: + mask = 0x01; + break; default: return; } @@ -660,18 +678,19 @@ int rxrpc_connect_call(struct rxrpc_call *call, struct sockaddr_rxrpc *srx, gfp_t gfp) { + struct rxrpc_net *rxnet = cp->local->rxnet; int ret; _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - rxrpc_discard_expired_client_conns(NULL); - rxrpc_cull_active_client_conns(); + rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper.work); + rxrpc_cull_active_client_conns(rxnet); ret = rxrpc_get_client_conn(call, cp, srx, gfp); if (ret < 0) return ret; - rxrpc_animate_client_conn(call->conn); + rxrpc_animate_client_conn(rxnet, call->conn); rxrpc_activate_channels(call->conn); ret = rxrpc_wait_for_channel(call, gfp); @@ -729,6 +748,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) unsigned int channel = call->cid & RXRPC_CHANNELMASK; struct rxrpc_connection *conn = call->conn; struct rxrpc_channel *chan = &conn->channels[channel]; + struct rxrpc_net *rxnet = rxrpc_net(sock_net(&call->socket->sk)); trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); call->conn = NULL; @@ -750,7 +770,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) /* We must deactivate or idle the connection if it's now * waiting for nothing. */ - spin_lock(&rxrpc_client_conn_cache_lock); + spin_lock(&rxnet->client_conn_cache_lock); if (conn->cache_state == RXRPC_CONN_CLIENT_WAITING && list_empty(&conn->waiting_calls) && !conn->active_chans) @@ -787,14 +807,23 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) * list. It might even get moved back to the active list whilst we're * waiting for the lock. */ - spin_lock(&rxrpc_client_conn_cache_lock); + spin_lock(&rxnet->client_conn_cache_lock); switch (conn->cache_state) { + case RXRPC_CONN_CLIENT_UPGRADE: + /* Deal with termination of a service upgrade probe. */ + if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { + clear_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags); + trace_rxrpc_client(conn, channel, rxrpc_client_to_active); + conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE; + rxrpc_activate_channels_locked(conn); + } + /* fall through */ case RXRPC_CONN_CLIENT_ACTIVE: if (list_empty(&conn->waiting_calls)) { rxrpc_deactivate_one_channel(conn, channel); if (!conn->active_chans) { - rxrpc_nr_active_client_conns--; + rxnet->nr_active_client_conns--; goto idle_connection; } goto out; @@ -820,7 +849,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) } out: - spin_unlock(&rxrpc_client_conn_cache_lock); + spin_unlock(&rxnet->client_conn_cache_lock); out_2: spin_unlock(&conn->channel_lock); rxrpc_put_connection(conn); @@ -835,11 +864,11 @@ idle_connection: trace_rxrpc_client(conn, channel, rxrpc_client_to_idle); conn->idle_timestamp = jiffies; conn->cache_state = RXRPC_CONN_CLIENT_IDLE; - list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns); - if (rxrpc_idle_client_conns.next == &conn->cache_link && - !rxrpc_kill_all_client_conns) + list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); + if (rxnet->idle_client_conns.next == &conn->cache_link && + !rxnet->kill_all_client_conns) queue_delayed_work(rxrpc_workqueue, - &rxrpc_client_conn_reap, + &rxnet->client_conn_reaper, rxrpc_conn_idle_client_expiry); } else { trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); @@ -857,6 +886,7 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) { struct rxrpc_connection *next = NULL; struct rxrpc_local *local = conn->params.local; + struct rxrpc_net *rxnet = local->rxnet; unsigned int nr_conns; trace_rxrpc_client(conn, -1, rxrpc_client_cleanup); @@ -875,18 +905,18 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) { trace_rxrpc_client(conn, -1, rxrpc_client_uncount); - spin_lock(&rxrpc_client_conn_cache_lock); - nr_conns = --rxrpc_nr_client_conns; + spin_lock(&rxnet->client_conn_cache_lock); + nr_conns = --rxnet->nr_client_conns; if (nr_conns < rxrpc_max_client_connections && - !list_empty(&rxrpc_waiting_client_conns)) { - next = list_entry(rxrpc_waiting_client_conns.next, + !list_empty(&rxnet->waiting_client_conns)) { + next = list_entry(rxnet->waiting_client_conns.next, struct rxrpc_connection, cache_link); rxrpc_get_connection(next); - rxrpc_activate_conn(next); + rxrpc_activate_conn(rxnet, next); } - spin_unlock(&rxrpc_client_conn_cache_lock); + spin_unlock(&rxnet->client_conn_cache_lock); } rxrpc_kill_connection(conn); @@ -921,10 +951,10 @@ void rxrpc_put_client_conn(struct rxrpc_connection *conn) /* * Kill the longest-active client connections to make room for new ones. */ -static void rxrpc_cull_active_client_conns(void) +static void rxrpc_cull_active_client_conns(struct rxrpc_net *rxnet) { struct rxrpc_connection *conn; - unsigned int nr_conns = rxrpc_nr_client_conns; + unsigned int nr_conns = rxnet->nr_client_conns; unsigned int nr_active, limit; _enter(""); @@ -936,14 +966,15 @@ static void rxrpc_cull_active_client_conns(void) } limit = rxrpc_reap_client_connections; - spin_lock(&rxrpc_client_conn_cache_lock); - nr_active = rxrpc_nr_active_client_conns; + spin_lock(&rxnet->client_conn_cache_lock); + nr_active = rxnet->nr_active_client_conns; while (nr_active > limit) { - ASSERT(!list_empty(&rxrpc_active_client_conns)); - conn = list_entry(rxrpc_active_client_conns.next, + ASSERT(!list_empty(&rxnet->active_client_conns)); + conn = list_entry(rxnet->active_client_conns.next, struct rxrpc_connection, cache_link); - ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE); + ASSERTIFCMP(conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE, + conn->cache_state, ==, RXRPC_CONN_CLIENT_UPGRADE); if (list_empty(&conn->waiting_calls)) { trace_rxrpc_client(conn, -1, rxrpc_client_to_culled); @@ -953,14 +984,14 @@ static void rxrpc_cull_active_client_conns(void) trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); conn->cache_state = RXRPC_CONN_CLIENT_WAITING; list_move_tail(&conn->cache_link, - &rxrpc_waiting_client_conns); + &rxnet->waiting_client_conns); } nr_active--; } - rxrpc_nr_active_client_conns = nr_active; - spin_unlock(&rxrpc_client_conn_cache_lock); + rxnet->nr_active_client_conns = nr_active; + spin_unlock(&rxnet->client_conn_cache_lock); ASSERTCMP(nr_active, >=, 0); _leave(" [culled]"); } @@ -972,22 +1003,25 @@ static void rxrpc_cull_active_client_conns(void) * This may be called from conn setup or from a work item so cannot be * considered non-reentrant. */ -static void rxrpc_discard_expired_client_conns(struct work_struct *work) +void rxrpc_discard_expired_client_conns(struct work_struct *work) { struct rxrpc_connection *conn; + struct rxrpc_net *rxnet = + container_of(to_delayed_work(work), + struct rxrpc_net, client_conn_reaper); unsigned long expiry, conn_expires_at, now; unsigned int nr_conns; bool did_discard = false; - _enter("%c", work ? 'w' : 'n'); + _enter(""); - if (list_empty(&rxrpc_idle_client_conns)) { + if (list_empty(&rxnet->idle_client_conns)) { _leave(" [empty]"); return; } /* Don't double up on the discarding */ - if (!spin_trylock(&rxrpc_client_conn_discard_mutex)) { + if (!spin_trylock(&rxnet->client_conn_discard_lock)) { _leave(" [already]"); return; } @@ -995,19 +1029,19 @@ static void rxrpc_discard_expired_client_conns(struct work_struct *work) /* We keep an estimate of what the number of conns ought to be after * we've discarded some so that we don't overdo the discarding. */ - nr_conns = rxrpc_nr_client_conns; + nr_conns = rxnet->nr_client_conns; next: - spin_lock(&rxrpc_client_conn_cache_lock); + spin_lock(&rxnet->client_conn_cache_lock); - if (list_empty(&rxrpc_idle_client_conns)) + if (list_empty(&rxnet->idle_client_conns)) goto out; - conn = list_entry(rxrpc_idle_client_conns.next, + conn = list_entry(rxnet->idle_client_conns.next, struct rxrpc_connection, cache_link); ASSERT(test_bit(RXRPC_CONN_EXPOSED, &conn->flags)); - if (!rxrpc_kill_all_client_conns) { + if (!rxnet->kill_all_client_conns) { /* If the number of connections is over the reap limit, we * expedite discard by reducing the expiry timeout. We must, * however, have at least a short grace period to be able to do @@ -1030,7 +1064,7 @@ next: conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; list_del_init(&conn->cache_link); - spin_unlock(&rxrpc_client_conn_cache_lock); + spin_unlock(&rxnet->client_conn_cache_lock); /* When we cleared the EXPOSED flag, we took on responsibility for the * reference that that had on the usage count. We deal with that here. @@ -1050,14 +1084,14 @@ not_yet_expired: * then things get messier. */ _debug("not yet"); - if (!rxrpc_kill_all_client_conns) + if (!rxnet->kill_all_client_conns) queue_delayed_work(rxrpc_workqueue, - &rxrpc_client_conn_reap, + &rxnet->client_conn_reaper, conn_expires_at - now); out: - spin_unlock(&rxrpc_client_conn_cache_lock); - spin_unlock(&rxrpc_client_conn_discard_mutex); + spin_unlock(&rxnet->client_conn_cache_lock); + spin_unlock(&rxnet->client_conn_discard_lock); _leave(""); } @@ -1065,17 +1099,17 @@ out: * Preemptively destroy all the client connection records rather than waiting * for them to time out */ -void __exit rxrpc_destroy_all_client_connections(void) +void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) { _enter(""); - spin_lock(&rxrpc_client_conn_cache_lock); - rxrpc_kill_all_client_conns = true; - spin_unlock(&rxrpc_client_conn_cache_lock); + spin_lock(&rxnet->client_conn_cache_lock); + rxnet->kill_all_client_conns = true; + spin_unlock(&rxnet->client_conn_cache_lock); - cancel_delayed_work(&rxrpc_client_conn_reap); + cancel_delayed_work(&rxnet->client_conn_reaper); - if (!queue_delayed_work(rxrpc_workqueue, &rxrpc_client_conn_reap, 0)) + if (!queue_delayed_work(rxrpc_workqueue, &rxnet->client_conn_reaper, 0)) _debug("destroy: queue failed"); _leave(""); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 46babcf82ce8..59a51a56e7c8 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -74,7 +74,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.whdr.userStatus = 0; pkt.whdr.securityIndex = conn->security_ix; pkt.whdr._rsvd = 0; - pkt.whdr.serviceId = htons(chan->last_service_id); + pkt.whdr.serviceId = htons(conn->service_id); len = sizeof(pkt.whdr); switch (chan->last_type) { @@ -208,7 +208,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, whdr.userStatus = 0; whdr.securityIndex = conn->security_ix; whdr._rsvd = 0; - whdr.serviceId = htons(conn->params.service_id); + whdr.serviceId = htons(conn->service_id); word = htonl(conn->local_abort); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index b0ecb770fdce..929b50d5afe8 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -22,13 +22,6 @@ */ unsigned int rxrpc_connection_expiry = 10 * 60; -static void rxrpc_connection_reaper(struct work_struct *work); - -LIST_HEAD(rxrpc_connections); -LIST_HEAD(rxrpc_connection_proc_list); -DEFINE_RWLOCK(rxrpc_connection_lock); -static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper); - static void rxrpc_destroy_connection(struct rcu_head *); /* @@ -174,7 +167,6 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, * through the channel, whilst disposing of the actual call record. */ trace_rxrpc_disconnect_call(call); - chan->last_service_id = call->service_id; if (call->abort_code) { chan->last_abort = call->abort_code; chan->last_type = RXRPC_PACKET_TYPE_ABORT; @@ -201,6 +193,8 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; + call->peer->cong_cwnd = call->cong_cwnd; + spin_lock_bh(&conn->params.peer->lock); hlist_del_init(&call->error_link); spin_unlock_bh(&conn->params.peer->lock); @@ -222,15 +216,17 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) */ void rxrpc_kill_connection(struct rxrpc_connection *conn) { + struct rxrpc_net *rxnet = conn->params.local->rxnet; + ASSERT(!rcu_access_pointer(conn->channels[0].call) && !rcu_access_pointer(conn->channels[1].call) && !rcu_access_pointer(conn->channels[2].call) && !rcu_access_pointer(conn->channels[3].call)); ASSERT(list_empty(&conn->cache_link)); - write_lock(&rxrpc_connection_lock); + write_lock(&rxnet->conn_lock); list_del_init(&conn->proc_link); - write_unlock(&rxrpc_connection_lock); + write_unlock(&rxnet->conn_lock); /* Drain the Rx queue. Note that even though we've unpublished, an * incoming packet could still be being added to our Rx queue, so we @@ -309,14 +305,17 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn) */ void rxrpc_put_service_conn(struct rxrpc_connection *conn) { + struct rxrpc_net *rxnet; const void *here = __builtin_return_address(0); int n; n = atomic_dec_return(&conn->usage); trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); ASSERTCMP(n, >=, 0); - if (n == 0) - rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); + if (n == 0) { + rxnet = conn->params.local->rxnet; + rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0); + } } /* @@ -348,9 +347,12 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) /* * reap dead service connections */ -static void rxrpc_connection_reaper(struct work_struct *work) +void rxrpc_service_connection_reaper(struct work_struct *work) { struct rxrpc_connection *conn, *_p; + struct rxrpc_net *rxnet = + container_of(to_delayed_work(work), + struct rxrpc_net, service_conn_reaper); unsigned long reap_older_than, earliest, idle_timestamp, now; LIST_HEAD(graveyard); @@ -361,8 +363,8 @@ static void rxrpc_connection_reaper(struct work_struct *work) reap_older_than = now - rxrpc_connection_expiry * HZ; earliest = ULONG_MAX; - write_lock(&rxrpc_connection_lock); - list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) { + write_lock(&rxnet->conn_lock); + list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { ASSERTCMP(atomic_read(&conn->usage), >, 0); if (likely(atomic_read(&conn->usage) > 1)) continue; @@ -393,12 +395,12 @@ static void rxrpc_connection_reaper(struct work_struct *work) list_move_tail(&conn->link, &graveyard); } - write_unlock(&rxrpc_connection_lock); + write_unlock(&rxnet->conn_lock); if (earliest != ULONG_MAX) { _debug("reschedule reaper %ld", (long) earliest - now); ASSERT(time_after(earliest, now)); - rxrpc_queue_delayed_work(&rxrpc_connection_reap, + rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, earliest - now); } @@ -418,36 +420,30 @@ static void rxrpc_connection_reaper(struct work_struct *work) * preemptively destroy all the service connection records rather than * waiting for them to time out */ -void __exit rxrpc_destroy_all_connections(void) +void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) { struct rxrpc_connection *conn, *_p; bool leak = false; _enter(""); - rxrpc_destroy_all_client_connections(); + rxrpc_destroy_all_client_connections(rxnet); rxrpc_connection_expiry = 0; - cancel_delayed_work(&rxrpc_connection_reap); - rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); + cancel_delayed_work(&rxnet->client_conn_reaper); + rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0); flush_workqueue(rxrpc_workqueue); - write_lock(&rxrpc_connection_lock); - list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) { + write_lock(&rxnet->conn_lock); + list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { pr_err("AF_RXRPC: Leaked conn %p {%d}\n", conn, atomic_read(&conn->usage)); leak = true; } - write_unlock(&rxrpc_connection_lock); + write_unlock(&rxnet->conn_lock); BUG_ON(leak); - ASSERT(list_empty(&rxrpc_connection_proc_list)); - - /* Make sure the local and peer records pinned by any dying connections - * are released. - */ - rcu_barrier(); - rxrpc_destroy_client_conn_ids(); + ASSERT(list_empty(&rxnet->conn_proc_list)); _leave(""); } diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index eef551f40dc2..e60fcd2a4a02 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -121,7 +121,8 @@ replace_old_connection: * Preallocate a service connection. The connection is placed on the proc and * reap lists so that we don't have to get the lock from BH context. */ -struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) +struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxnet, + gfp_t gfp) { struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp); @@ -132,10 +133,10 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) conn->state = RXRPC_CONN_SERVICE_PREALLOC; atomic_set(&conn->usage, 2); - write_lock(&rxrpc_connection_lock); - list_add_tail(&conn->link, &rxrpc_connections); - list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); - write_unlock(&rxrpc_connection_lock); + write_lock(&rxnet->conn_lock); + list_add_tail(&conn->link, &rxnet->service_conns); + list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); + write_unlock(&rxnet->conn_lock); trace_rxrpc_conn(conn, rxrpc_conn_new_service, atomic_read(&conn->usage), @@ -149,7 +150,8 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) * Set up an incoming connection. This is called in BH context with the RCU * read lock held. */ -void rxrpc_new_incoming_connection(struct rxrpc_connection *conn, +void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, + struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -159,6 +161,7 @@ void rxrpc_new_incoming_connection(struct rxrpc_connection *conn, conn->proto.epoch = sp->hdr.epoch; conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK; conn->params.service_id = sp->hdr.serviceId; + conn->service_id = sp->hdr.serviceId; conn->security_ix = sp->hdr.securityIndex; conn->out_clientflag = 0; if (conn->security_ix) @@ -166,6 +169,14 @@ void rxrpc_new_incoming_connection(struct rxrpc_connection *conn, else conn->state = RXRPC_CONN_SERVICE; + /* See if we should upgrade the service. This can only happen on the + * first packet on a new connection. Once done, it applies to all + * subsequent calls on that connection. + */ + if (sp->hdr.userStatus == RXRPC_USERSTATUS_SERVICE_UPGRADE && + conn->service_id == rx->service_upgrade.from) + conn->service_id = rx->service_upgrade.to; + /* Make the connection a target for incoming packets. */ rxrpc_publish_service_conn(conn->params.peer, conn); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 45dba732a3b4..e56e23ed2229 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1142,6 +1142,13 @@ void rxrpc_data_ready(struct sock *udp_sk) if (sp->hdr.securityIndex != conn->security_ix) goto wrong_security; + if (sp->hdr.serviceId != conn->service_id) { + if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) || + conn->service_id != conn->params.service_id) + goto reupgrade; + conn->service_id = sp->hdr.serviceId; + } + if (sp->hdr.callNumber == 0) { /* Connection-level packet */ _debug("CONN %p {%d}", conn, conn->debug_id); @@ -1194,6 +1201,9 @@ void rxrpc_data_ready(struct sock *udp_sk) rxrpc_input_implicit_end_call(conn, call); call = NULL; } + + if (call && sp->hdr.serviceId != call->service_id) + call->service_id = sp->hdr.serviceId; } else { skew = 0; call = NULL; @@ -1237,11 +1247,18 @@ wrong_security: skb->priority = RXKADINCONSISTENCY; goto post_abort; +reupgrade: + rcu_read_unlock(); + trace_rxrpc_abort("UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); + goto protocol_error; + bad_message_unlock: rcu_read_unlock(); bad_message: trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); +protocol_error: skb->priority = RX_PROTOCOL_ERROR; post_abort: skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index ff4864d550b8..38b99db30e54 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -25,9 +25,6 @@ static void rxrpc_local_processor(struct work_struct *); static void rxrpc_local_rcu(struct rcu_head *); -static DEFINE_MUTEX(rxrpc_local_mutex); -static LIST_HEAD(rxrpc_local_endpoints); - /* * Compare a local to an address. Return -ve, 0 or +ve to indicate less than, * same or greater than. @@ -77,13 +74,15 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, /* * Allocate a new local endpoint. */ -static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) +static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, + const struct sockaddr_rxrpc *srx) { struct rxrpc_local *local; local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL); if (local) { atomic_set(&local->usage, 1); + local->rxnet = rxnet; INIT_LIST_HEAD(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); init_rwsem(&local->defrag_sem); @@ -95,6 +94,7 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) rwlock_init(&local->services_lock); local->debug_id = atomic_inc_return(&rxrpc_debug_id); memcpy(&local->srx, srx, sizeof(*srx)); + local->srx.srx_service = 0; } _leave(" = %p", local); @@ -105,7 +105,7 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) * create the local socket * - must be called with rxrpc_local_mutex locked */ -static int rxrpc_open_socket(struct rxrpc_local *local) +static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) { struct sock *sock; int ret, opt; @@ -114,7 +114,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local) local, local->srx.transport_type, local->srx.transport.family); /* create a socket to represent the local endpoint */ - ret = sock_create_kern(&init_net, local->srx.transport.family, + ret = sock_create_kern(net, local->srx.transport.family, local->srx.transport_type, 0, &local->socket); if (ret < 0) { _leave(" = %d [socket]", ret); @@ -172,9 +172,11 @@ error: /* * Look up or create a new local endpoint using the specified local address. */ -struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) +struct rxrpc_local *rxrpc_lookup_local(struct net *net, + const struct sockaddr_rxrpc *srx) { struct rxrpc_local *local; + struct rxrpc_net *rxnet = rxrpc_net(net); struct list_head *cursor; const char *age; long diff; @@ -183,10 +185,10 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) _enter("{%d,%d,%pISp}", srx->transport_type, srx->transport.family, &srx->transport); - mutex_lock(&rxrpc_local_mutex); + mutex_lock(&rxnet->local_mutex); - for (cursor = rxrpc_local_endpoints.next; - cursor != &rxrpc_local_endpoints; + for (cursor = rxnet->local_endpoints.next; + cursor != &rxnet->local_endpoints; cursor = cursor->next) { local = list_entry(cursor, struct rxrpc_local, link); @@ -220,11 +222,11 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) goto found; } - local = rxrpc_alloc_local(srx); + local = rxrpc_alloc_local(rxnet, srx); if (!local) goto nomem; - ret = rxrpc_open_socket(local); + ret = rxrpc_open_socket(local, net); if (ret < 0) goto sock_error; @@ -232,7 +234,7 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) age = "new"; found: - mutex_unlock(&rxrpc_local_mutex); + mutex_unlock(&rxnet->local_mutex); _net("LOCAL %s %d {%pISp}", age, local->debug_id, &local->srx.transport); @@ -243,13 +245,13 @@ found: nomem: ret = -ENOMEM; sock_error: - mutex_unlock(&rxrpc_local_mutex); + mutex_unlock(&rxnet->local_mutex); kfree(local); _leave(" = %d", ret); return ERR_PTR(ret); addr_in_use: - mutex_unlock(&rxrpc_local_mutex); + mutex_unlock(&rxnet->local_mutex); _leave(" = -EADDRINUSE"); return ERR_PTR(-EADDRINUSE); } @@ -273,6 +275,7 @@ void __rxrpc_put_local(struct rxrpc_local *local) static void rxrpc_local_destroyer(struct rxrpc_local *local) { struct socket *socket = local->socket; + struct rxrpc_net *rxnet = local->rxnet; _enter("%d", local->debug_id); @@ -286,9 +289,9 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) } local->dead = true; - mutex_lock(&rxrpc_local_mutex); + mutex_lock(&rxnet->local_mutex); list_del_init(&local->link); - mutex_unlock(&rxrpc_local_mutex); + mutex_unlock(&rxnet->local_mutex); ASSERT(RB_EMPTY_ROOT(&local->client_conns)); ASSERT(!local->service); @@ -357,7 +360,7 @@ static void rxrpc_local_rcu(struct rcu_head *rcu) /* * Verify the local endpoint list is empty by this point. */ -void __exit rxrpc_destroy_all_locals(void) +void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet) { struct rxrpc_local *local; @@ -365,15 +368,13 @@ void __exit rxrpc_destroy_all_locals(void) flush_workqueue(rxrpc_workqueue); - if (!list_empty(&rxrpc_local_endpoints)) { - mutex_lock(&rxrpc_local_mutex); - list_for_each_entry(local, &rxrpc_local_endpoints, link) { + if (!list_empty(&rxnet->local_endpoints)) { + mutex_lock(&rxnet->local_mutex); + list_for_each_entry(local, &rxnet->local_endpoints, link) { pr_err("AF_RXRPC: Leaked local %p {%d}\n", local, atomic_read(&local->usage)); } - mutex_unlock(&rxrpc_local_mutex); + mutex_unlock(&rxnet->local_mutex); BUG(); } - - rcu_barrier(); } diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c new file mode 100644 index 000000000000..7edceb8522f5 --- /dev/null +++ b/net/rxrpc/net_ns.c @@ -0,0 +1,84 @@ +/* rxrpc network namespace handling. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/proc_fs.h> +#include "ar-internal.h" + +unsigned int rxrpc_net_id; + +/* + * Initialise a per-network namespace record. + */ +static __net_init int rxrpc_init_net(struct net *net) +{ + struct rxrpc_net *rxnet = rxrpc_net(net); + int ret; + + get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch)); + rxnet->epoch |= RXRPC_RANDOM_EPOCH; + + INIT_LIST_HEAD(&rxnet->calls); + rwlock_init(&rxnet->call_lock); + + INIT_LIST_HEAD(&rxnet->conn_proc_list); + INIT_LIST_HEAD(&rxnet->service_conns); + rwlock_init(&rxnet->conn_lock); + INIT_DELAYED_WORK(&rxnet->service_conn_reaper, + rxrpc_service_connection_reaper); + + rxnet->nr_client_conns = 0; + rxnet->nr_active_client_conns = 0; + rxnet->kill_all_client_conns = false; + spin_lock_init(&rxnet->client_conn_cache_lock); + spin_lock_init(&rxnet->client_conn_discard_lock); + INIT_LIST_HEAD(&rxnet->waiting_client_conns); + INIT_LIST_HEAD(&rxnet->active_client_conns); + INIT_LIST_HEAD(&rxnet->idle_client_conns); + INIT_DELAYED_WORK(&rxnet->client_conn_reaper, + rxrpc_discard_expired_client_conns); + + INIT_LIST_HEAD(&rxnet->local_endpoints); + mutex_init(&rxnet->local_mutex); + hash_init(rxnet->peer_hash); + spin_lock_init(&rxnet->peer_hash_lock); + + ret = -ENOMEM; + rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net); + if (!rxnet->proc_net) + goto err_proc; + + proc_create("calls", 0444, rxnet->proc_net, &rxrpc_call_seq_fops); + proc_create("conns", 0444, rxnet->proc_net, &rxrpc_connection_seq_fops); + return 0; + +err_proc: + return ret; +} + +/* + * Clean up a per-network namespace record. + */ +static __net_exit void rxrpc_exit_net(struct net *net) +{ + struct rxrpc_net *rxnet = rxrpc_net(net); + + rxrpc_destroy_all_calls(rxnet); + rxrpc_destroy_all_connections(rxnet); + rxrpc_destroy_all_locals(rxnet); + proc_remove(rxnet->proc_net); +} + +struct pernet_operations rxrpc_net_ops = { + .init = rxrpc_init_net, + .exit = rxrpc_exit_net, + .id = &rxrpc_net_id, + .size = sizeof(struct rxrpc_net), +}; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 5dab1ff3a6c2..5bd2d0fa4a03 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -292,6 +292,10 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, whdr._rsvd = htons(sp->hdr._rsvd); whdr.serviceId = htons(call->service_id); + if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && + sp->hdr.seq == 1) + whdr.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; + iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); iov[1].iov_base = skb->head; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 862eea6b266c..5787f97f5330 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -26,9 +26,6 @@ #include <net/ip6_route.h> #include "ar-internal.h" -static DEFINE_HASHTABLE(rxrpc_peer_hash, 10); -static DEFINE_SPINLOCK(rxrpc_peer_hash_lock); - /* * Hash a peer key. */ @@ -124,8 +121,9 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu( unsigned long hash_key) { struct rxrpc_peer *peer; + struct rxrpc_net *rxnet = local->rxnet; - hash_for_each_possible_rcu(rxrpc_peer_hash, peer, hash_link, hash_key) { + hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) { if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0) { if (atomic_read(&peer->usage) == 0) return NULL; @@ -230,6 +228,13 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) seqlock_init(&peer->service_conn_lock); spin_lock_init(&peer->lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); + + if (RXRPC_TX_SMSS > 2190) + peer->cong_cwnd = 2; + else if (RXRPC_TX_SMSS > 1095) + peer->cong_cwnd = 3; + else + peer->cong_cwnd = 4; } _leave(" = %p", peer); @@ -301,13 +306,14 @@ struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *prealloc) { struct rxrpc_peer *peer; + struct rxrpc_net *rxnet = local->rxnet; unsigned long hash_key; hash_key = rxrpc_peer_hash_key(local, &prealloc->srx); prealloc->local = local; rxrpc_init_peer(prealloc, hash_key); - spin_lock(&rxrpc_peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); /* Need to check that we aren't racing with someone else */ peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key); @@ -315,10 +321,10 @@ struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local, peer = NULL; if (!peer) { peer = prealloc; - hash_add_rcu(rxrpc_peer_hash, &peer->hash_link, hash_key); + hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); } - spin_unlock(&rxrpc_peer_hash_lock); + spin_unlock(&rxnet->peer_hash_lock); return peer; } @@ -329,6 +335,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_peer *peer, *candidate; + struct rxrpc_net *rxnet = local->rxnet; unsigned long hash_key = rxrpc_peer_hash_key(local, srx); _enter("{%pISp}", &srx->transport); @@ -350,17 +357,17 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, return NULL; } - spin_lock_bh(&rxrpc_peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); /* Need to check that we aren't racing with someone else */ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); if (peer && !rxrpc_get_peer_maybe(peer)) peer = NULL; if (!peer) - hash_add_rcu(rxrpc_peer_hash, + hash_add_rcu(rxnet->peer_hash, &candidate->hash_link, hash_key); - spin_unlock_bh(&rxrpc_peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); if (peer) kfree(candidate); @@ -379,11 +386,13 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, */ void __rxrpc_put_peer(struct rxrpc_peer *peer) { + struct rxrpc_net *rxnet = peer->local->rxnet; + ASSERT(hlist_empty(&peer->error_targets)); - spin_lock_bh(&rxrpc_peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); hash_del_rcu(&peer->hash_link); - spin_unlock_bh(&rxrpc_peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); kfree_rcu(peer, rcu); } diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index b9bcfbfb095c..7421656963a9 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -30,19 +30,25 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { */ static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos) { + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + rcu_read_lock(); - read_lock(&rxrpc_call_lock); - return seq_list_start_head(&rxrpc_calls, *_pos); + read_lock(&rxnet->call_lock); + return seq_list_start_head(&rxnet->calls, *_pos); } static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - return seq_list_next(v, &rxrpc_calls, pos); + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + return seq_list_next(v, &rxnet->calls, pos); } static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) { - read_unlock(&rxrpc_call_lock); + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + read_unlock(&rxnet->call_lock); rcu_read_unlock(); } @@ -52,10 +58,11 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) struct rxrpc_sock *rx; struct rxrpc_peer *peer; struct rxrpc_call *call; + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); rxrpc_seq_t tx_hard_ack, rx_hard_ack; char lbuff[50], rbuff[50]; - if (v == &rxrpc_calls) { + if (v == &rxnet->calls) { seq_puts(seq, "Proto Local " " Remote " @@ -113,7 +120,8 @@ static const struct seq_operations rxrpc_call_seq_ops = { static int rxrpc_call_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &rxrpc_call_seq_ops); + return seq_open_net(inode, file, &rxrpc_call_seq_ops, + sizeof(struct seq_net_private)); } const struct file_operations rxrpc_call_seq_fops = { @@ -129,27 +137,34 @@ const struct file_operations rxrpc_call_seq_fops = { */ static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos) { - read_lock(&rxrpc_connection_lock); - return seq_list_start_head(&rxrpc_connection_proc_list, *_pos); + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + read_lock(&rxnet->conn_lock); + return seq_list_start_head(&rxnet->conn_proc_list, *_pos); } static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - return seq_list_next(v, &rxrpc_connection_proc_list, pos); + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + return seq_list_next(v, &rxnet->conn_proc_list, pos); } static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v) { - read_unlock(&rxrpc_connection_lock); + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + read_unlock(&rxnet->conn_lock); } static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) { struct rxrpc_connection *conn; + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); char lbuff[50], rbuff[50]; - if (v == &rxrpc_connection_proc_list) { + if (v == &rxnet->conn_proc_list) { seq_puts(seq, "Proto Local " " Remote " @@ -175,7 +190,7 @@ print: " %s %08x %08x %08x\n", lbuff, rbuff, - conn->params.service_id, + conn->service_id, conn->proto.cid, rxrpc_conn_is_service(conn) ? "Svc" : "Clt", atomic_read(&conn->usage), @@ -197,7 +212,8 @@ static const struct seq_operations rxrpc_connection_seq_ops = { static int rxrpc_connection_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &rxrpc_connection_seq_ops); + return seq_open_net(inode, file, &rxrpc_connection_seq_ops, + sizeof(struct seq_net_private)); } const struct file_operations rxrpc_connection_seq_fops = { diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index f9caf3b77509..bdece21f313d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -522,8 +522,11 @@ try_again: } if (msg->msg_name) { - size_t len = sizeof(call->conn->params.peer->srx); - memcpy(msg->msg_name, &call->conn->params.peer->srx, len); + struct sockaddr_rxrpc *srx = msg->msg_name; + size_t len = sizeof(call->peer->srx); + + memcpy(msg->msg_name, &call->peer->srx, len); + srx->srx_service = call->service_id; msg->msg_namelen = len; } diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 1bb9b2ccc267..46d1a1f0b55b 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -227,7 +227,9 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, len &= ~(call->conn->size_align - 1); sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, len); + err = skb_to_sgvec(skb, sg, 0, len); + if (unlikely(err < 0)) + goto out; skcipher_request_set_crypt(req, sg, sg, len, iv.x); crypto_skcipher_encrypt(req); @@ -324,7 +326,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, bool aborted; u32 data_size, buf; u16 check; - int nsg; + int nsg, ret; _enter(""); @@ -342,7 +344,9 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, goto nomem; sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, offset, 8); + ret = skb_to_sgvec(skb, sg, offset, 8); + if (unlikely(ret < 0)) + return ret; /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); @@ -409,7 +413,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, bool aborted; u32 data_size, buf; u16 check; - int nsg; + int nsg, ret; _enter(",{%d}", skb->len); @@ -434,7 +438,12 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, } sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, offset, len); + ret = skb_to_sgvec(skb, sg, offset, len); + if (unlikely(ret < 0)) { + if (sg != _sg) + kfree(sg); + return ret; + } /* decrypt from the session key */ token = call->conn->params.key->payload.data[0]; @@ -640,7 +649,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) whdr.userStatus = 0; whdr.securityIndex = conn->security_ix; whdr._rsvd = 0; - whdr.serviceId = htons(conn->params.service_id); + whdr.serviceId = htons(conn->service_id); iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 7d921e56e715..e9f428351293 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -121,7 +121,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) _enter(""); - sprintf(kdesc, "%u:%u", conn->params.service_id, conn->security_ix); + sprintf(kdesc, "%u:%u", conn->service_id, conn->security_ix); sec = rxrpc_security_lookup(conn->security_ix); if (!sec) { @@ -133,7 +133,8 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) read_lock(&local->services_lock); rx = rcu_dereference_protected(local->service, lockdep_is_held(&local->services_lock)); - if (rx && rx->srx.srx_service == conn->params.service_id) + if (rx && (rx->srx.srx_service == conn->service_id || + rx->second_service == conn->service_id)) goto found_service; /* the service appears to have died */ diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 96ffa5d5733b..2e636a525a65 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -28,6 +28,15 @@ enum rxrpc_command { RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ }; +struct rxrpc_send_params { + s64 tx_total_len; /* Total Tx data length (if send data) */ + unsigned long user_call_ID; /* User's call ID */ + u32 abort_code; /* Abort code to Tx (if abort) */ + enum rxrpc_command command : 8; /* The command to implement */ + bool exclusive; /* Shared or exclusive call */ + bool upgrade; /* If the connection is upgradeable */ +}; + /* * wait for space to appear in the transmit/ACK window * - caller holds the socket locked @@ -199,6 +208,13 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, more = msg->msg_flags & MSG_MORE; + if (call->tx_total_len != -1) { + if (len > call->tx_total_len) + return -EMSGSIZE; + if (!more && len != call->tx_total_len) + return -EMSGSIZE; + } + skb = call->tx_pending; call->tx_pending = NULL; rxrpc_see_skb(skb, rxrpc_skb_tx_seen); @@ -291,6 +307,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, sp->remain -= copy; skb->mark += copy; copied += copy; + if (call->tx_total_len != -1) + call->tx_total_len -= copy; } /* check for the far side aborting the call or a network error @@ -362,18 +380,12 @@ efault: /* * extract control messages from the sendmsg() control buffer */ -static int rxrpc_sendmsg_cmsg(struct msghdr *msg, - unsigned long *user_call_ID, - enum rxrpc_command *command, - u32 *abort_code, - bool *_exclusive) +static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) { struct cmsghdr *cmsg; bool got_user_ID = false; int len; - *command = RXRPC_CMD_SEND_DATA; - if (msg->msg_controllen == 0) return -EINVAL; @@ -393,42 +405,55 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, if (msg->msg_flags & MSG_CMSG_COMPAT) { if (len != sizeof(u32)) return -EINVAL; - *user_call_ID = *(u32 *) CMSG_DATA(cmsg); + p->user_call_ID = *(u32 *)CMSG_DATA(cmsg); } else { if (len != sizeof(unsigned long)) return -EINVAL; - *user_call_ID = *(unsigned long *) + p->user_call_ID = *(unsigned long *) CMSG_DATA(cmsg); } - _debug("User Call ID %lx", *user_call_ID); got_user_ID = true; break; case RXRPC_ABORT: - if (*command != RXRPC_CMD_SEND_DATA) + if (p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; - *command = RXRPC_CMD_SEND_ABORT; - if (len != sizeof(*abort_code)) + p->command = RXRPC_CMD_SEND_ABORT; + if (len != sizeof(p->abort_code)) return -EINVAL; - *abort_code = *(unsigned int *) CMSG_DATA(cmsg); - _debug("Abort %x", *abort_code); - if (*abort_code == 0) + p->abort_code = *(unsigned int *)CMSG_DATA(cmsg); + if (p->abort_code == 0) return -EINVAL; break; case RXRPC_ACCEPT: - if (*command != RXRPC_CMD_SEND_DATA) + if (p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; - *command = RXRPC_CMD_ACCEPT; + p->command = RXRPC_CMD_ACCEPT; if (len != 0) return -EINVAL; break; case RXRPC_EXCLUSIVE_CALL: - *_exclusive = true; + p->exclusive = true; + if (len != 0) + return -EINVAL; + break; + + case RXRPC_UPGRADE_SERVICE: + p->upgrade = true; if (len != 0) return -EINVAL; break; + + case RXRPC_TX_LENGTH: + if (p->tx_total_len != -1 || len != sizeof(__s64)) + return -EINVAL; + p->tx_total_len = *(__s64 *)CMSG_DATA(cmsg); + if (p->tx_total_len < 0) + return -EINVAL; + break; + default: return -EINVAL; } @@ -436,6 +461,8 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, if (!got_user_ID) return -EINVAL; + if (p->tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) + return -EINVAL; _leave(" = 0"); return 0; } @@ -447,7 +474,7 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, */ static struct rxrpc_call * rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, - unsigned long user_call_ID, bool exclusive) + struct rxrpc_send_params *p) __releases(&rx->sk.sk_lock.slock) { struct rxrpc_conn_parameters cp; @@ -471,9 +498,11 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, cp.local = rx->local; cp.key = rx->key; cp.security_level = rx->min_sec_level; - cp.exclusive = rx->exclusive | exclusive; + cp.exclusive = rx->exclusive | p->exclusive; + cp.upgrade = p->upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL); + call = rxrpc_new_client_call(rx, &cp, srx, p->user_call_ID, + p->tx_total_len, GFP_KERNEL); /* The socket is now unlocked */ _leave(" = %p\n", call); @@ -489,25 +518,29 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) __releases(&rx->sk.sk_lock.slock) { enum rxrpc_call_state state; - enum rxrpc_command cmd; struct rxrpc_call *call; - unsigned long user_call_ID = 0; - bool exclusive = false; - u32 abort_code = 0; int ret; + struct rxrpc_send_params p = { + .tx_total_len = -1, + .user_call_ID = 0, + .abort_code = 0, + .command = RXRPC_CMD_SEND_DATA, + .exclusive = false, + .upgrade = true, + }; + _enter(""); - ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code, - &exclusive); + ret = rxrpc_sendmsg_cmsg(msg, &p); if (ret < 0) goto error_release_sock; - if (cmd == RXRPC_CMD_ACCEPT) { + if (p.command == RXRPC_CMD_ACCEPT) { ret = -EINVAL; if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) goto error_release_sock; - call = rxrpc_accept_call(rx, user_call_ID, NULL); + call = rxrpc_accept_call(rx, p.user_call_ID, NULL); /* The socket is now unlocked. */ if (IS_ERR(call)) return PTR_ERR(call); @@ -515,13 +548,12 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) return 0; } - call = rxrpc_find_call_by_user_ID(rx, user_call_ID); + call = rxrpc_find_call_by_user_ID(rx, p.user_call_ID); if (!call) { ret = -EBADSLT; - if (cmd != RXRPC_CMD_SEND_DATA) + if (p.command != RXRPC_CMD_SEND_DATA) goto error_release_sock; - call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID, - exclusive); + call = rxrpc_new_client_call_for_sendmsg(rx, msg, &p); /* The socket is now unlocked... */ if (IS_ERR(call)) return PTR_ERR(call); @@ -545,6 +577,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = -ERESTARTSYS; goto error_put; } + + if (p.tx_total_len != -1) { + ret = -EINVAL; + if (call->tx_total_len != -1 || + call->tx_pending || + call->tx_top != 0) + goto error_put; + call->tx_total_len = p.tx_total_len; + } } state = READ_ONCE(call->state); @@ -554,11 +595,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (state >= RXRPC_CALL_COMPLETE) { /* it's too late for this call */ ret = -ESHUTDOWN; - } else if (cmd == RXRPC_CMD_SEND_ABORT) { + } else if (p.command == RXRPC_CMD_SEND_ABORT) { ret = 0; - if (rxrpc_abort_call("CMD", call, 0, abort_code, -ECONNABORTED)) + if (rxrpc_abort_call("CMD", call, 0, p.abort_code, -ECONNABORTED)) ret = rxrpc_send_abort_packet(call); - } else if (cmd != RXRPC_CMD_SEND_DATA) { + } else if (p.command != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; } else if (rxrpc_is_client_call(call) && state != RXRPC_CALL_CLIENT_SEND_REQUEST) { @@ -662,5 +703,24 @@ bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, mutex_unlock(&call->user_mutex); return aborted; } - EXPORT_SYMBOL(rxrpc_kernel_abort_call); + +/** + * rxrpc_kernel_set_tx_length - Set the total Tx length on a call + * @sock: The socket the call is on + * @call: The call to be informed + * @tx_total_len: The amount of data to be transmitted for this call + * + * Allow a kernel service to set the total transmit length on a call. This + * allows buffer-to-packet encrypt-and-copy to be performed. + * + * This function is primarily for use for setting the reply length since the + * request length can be set when beginning the call. + */ +void rxrpc_kernel_set_tx_length(struct socket *sock, struct rxrpc_call *call, + s64 tx_total_len) +{ + WARN_ON(call->tx_total_len != -1); + call->tx_total_len = tx_total_len; +} +EXPORT_SYMBOL(rxrpc_kernel_set_tx_length); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 9fb84f0de6af..e70ed26485a2 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -649,6 +649,7 @@ config NET_EMATCH_IPSET config NET_CLS_ACT bool "Actions" + select NET_CLS ---help--- Say Y here if you want to use traffic control actions. Actions get attached to classifiers and are invoked after a successful diff --git a/net/sched/act_api.c b/net/sched/act_api.c index a90e8f355c00..aed6cf2e9fd8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -28,6 +28,31 @@ #include <net/act_api.h> #include <net/netlink.h> +static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp) +{ + u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK; + + if (!tp) + return -EINVAL; + a->goto_chain = tcf_chain_get(tp->chain->block, chain_index, true); + if (!a->goto_chain) + return -ENOMEM; + return 0; +} + +static void tcf_action_goto_chain_fini(struct tc_action *a) +{ + tcf_chain_put(a->goto_chain); +} + +static void tcf_action_goto_chain_exec(const struct tc_action *a, + struct tcf_result *res) +{ + const struct tcf_chain *chain = a->goto_chain; + + res->goto_tp = rcu_dereference_bh(chain->filter_chain); +} + static void free_tcf(struct rcu_head *head) { struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu); @@ -39,6 +64,8 @@ static void free_tcf(struct rcu_head *head) kfree(p->act_cookie->data); kfree(p->act_cookie); } + if (p->goto_chain) + tcf_action_goto_chain_fini(p); kfree(p); } @@ -465,6 +492,8 @@ repeat: else /* faulty graph, stop pipeline */ return TC_ACT_OK; } + } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { + tcf_action_goto_chain_exec(a, res); } if (ret != TC_ACT_PIPE) @@ -570,9 +599,9 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) return c; } -struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, - struct nlattr *est, char *name, int ovr, - int bind) +struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, + struct nlattr *nla, struct nlattr *est, + char *name, int ovr, int bind) { struct tc_action *a; struct tc_action_ops *a_o; @@ -657,6 +686,17 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, if (err != ACT_P_CREATED) module_put(a_o->owner); + if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) { + err = tcf_action_goto_chain_init(a, tp); + if (err) { + LIST_HEAD(actions); + + list_add_tail(&a->list, &actions); + tcf_action_destroy(&actions, bind); + return ERR_PTR(err); + } + } + return a; err_mod: @@ -680,8 +720,9 @@ static void cleanup_a(struct list_head *actions, int ovr) a->tcfa_refcnt--; } -int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind, struct list_head *actions) +int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, + struct nlattr *est, char *name, int ovr, int bind, + struct list_head *actions) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; @@ -693,7 +734,7 @@ int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, return err; for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_init_1(net, tb[i], est, name, ovr, bind); + act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind); if (IS_ERR(act)) { err = PTR_ERR(act); goto err; @@ -1020,7 +1061,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, int ret = 0; LIST_HEAD(actions); - ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions); + ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions); if (ret) return ret; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index ab6fdbd34db7..3317a2f579da 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -350,6 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl, sctph->checksum = sctp_compute_cksum(skb, skb_network_offset(skb) + ihl); skb->ip_summed = CHECKSUM_NONE; + skb->csum_not_inet = 0; return 1; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 22f88b35a546..39da0c5801c9 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -106,13 +106,12 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, - struct tcf_proto __rcu **chain, int event) + struct tcf_chain *chain, int event) { - struct tcf_proto __rcu **it_chain; struct tcf_proto *tp; - for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL; - it_chain = &tp->next) + for (tp = rtnl_dereference(chain->filter_chain); + tp; tp = rtnl_dereference(tp->next)) tfilter_notify(net, oskb, n, tp, 0, event, false); } @@ -125,11 +124,12 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) if (tp) first = tp->prio - 1; - return first; + return TC_H_MAJ(first); } static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, - u32 prio, u32 parent, struct Qdisc *q) + u32 prio, u32 parent, struct Qdisc *q, + struct tcf_chain *chain) { struct tcf_proto *tp; int err; @@ -165,6 +165,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, tp->prio = prio; tp->classid = parent; tp->q = q; + tp->chain = chain; err = tp->ops->init(tp); if (err) { @@ -185,16 +186,226 @@ static void tcf_proto_destroy(struct tcf_proto *tp) kfree_rcu(tp, rcu); } -void tcf_destroy_chain(struct tcf_proto __rcu **fl) +static struct tcf_chain *tcf_chain_create(struct tcf_block *block, + u32 chain_index) +{ + struct tcf_chain *chain; + + chain = kzalloc(sizeof(*chain), GFP_KERNEL); + if (!chain) + return NULL; + list_add_tail(&chain->list, &block->chain_list); + chain->block = block; + chain->index = chain_index; + chain->refcnt = 1; + return chain; +} + +static void tcf_chain_flush(struct tcf_chain *chain) { struct tcf_proto *tp; - while ((tp = rtnl_dereference(*fl)) != NULL) { - RCU_INIT_POINTER(*fl, tp->next); + if (*chain->p_filter_chain) + RCU_INIT_POINTER(*chain->p_filter_chain, NULL); + while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) { + RCU_INIT_POINTER(chain->filter_chain, tp->next); tcf_proto_destroy(tp); } } -EXPORT_SYMBOL(tcf_destroy_chain); + +static void tcf_chain_destroy(struct tcf_chain *chain) +{ + list_del(&chain->list); + tcf_chain_flush(chain); + kfree(chain); +} + +struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, + bool create) +{ + struct tcf_chain *chain; + + list_for_each_entry(chain, &block->chain_list, list) { + if (chain->index == chain_index) { + chain->refcnt++; + return chain; + } + } + if (create) + return tcf_chain_create(block, chain_index); + else + return NULL; +} +EXPORT_SYMBOL(tcf_chain_get); + +void tcf_chain_put(struct tcf_chain *chain) +{ + /* Destroy unused chain, with exception of chain 0, which is the + * default one and has to be always present. + */ + if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0) + tcf_chain_destroy(chain); +} +EXPORT_SYMBOL(tcf_chain_put); + +static void +tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain, + struct tcf_proto __rcu **p_filter_chain) +{ + chain->p_filter_chain = p_filter_chain; +} + +int tcf_block_get(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain) +{ + struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL); + struct tcf_chain *chain; + int err; + + if (!block) + return -ENOMEM; + INIT_LIST_HEAD(&block->chain_list); + /* Create chain 0 by default, it has to be always present. */ + chain = tcf_chain_create(block, 0); + if (!chain) { + err = -ENOMEM; + goto err_chain_create; + } + tcf_chain_filter_chain_ptr_set(chain, p_filter_chain); + *p_block = block; + return 0; + +err_chain_create: + kfree(block); + return err; +} +EXPORT_SYMBOL(tcf_block_get); + +void tcf_block_put(struct tcf_block *block) +{ + struct tcf_chain *chain, *tmp; + + if (!block) + return; + + list_for_each_entry_safe(chain, tmp, &block->chain_list, list) + tcf_chain_destroy(chain); + kfree(block); +} +EXPORT_SYMBOL(tcf_block_put); + +/* Main classifier routine: scans classifier chain attached + * to this qdisc, (optionally) tests for protocol and asks + * specific classifiers. + */ +int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ + __be16 protocol = tc_skb_protocol(skb); +#ifdef CONFIG_NET_CLS_ACT + const int max_reclassify_loop = 4; + const struct tcf_proto *orig_tp = tp; + const struct tcf_proto *first_tp; + int limit = 0; + +reclassify: +#endif + for (; tp; tp = rcu_dereference_bh(tp->next)) { + int err; + + if (tp->protocol != protocol && + tp->protocol != htons(ETH_P_ALL)) + continue; + + err = tp->classify(skb, tp, res); +#ifdef CONFIG_NET_CLS_ACT + if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) { + first_tp = orig_tp; + goto reset; + } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { + first_tp = res->goto_tp; + goto reset; + } +#endif + if (err >= 0) + return err; + } + + return TC_ACT_UNSPEC; /* signal: continue lookup */ +#ifdef CONFIG_NET_CLS_ACT +reset: + if (unlikely(limit++ >= max_reclassify_loop)) { + net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n", + tp->q->ops->id, tp->prio & 0xffff, + ntohs(tp->protocol)); + return TC_ACT_SHOT; + } + + tp = first_tp; + protocol = tc_skb_protocol(skb); + goto reclassify; +#endif +} +EXPORT_SYMBOL(tcf_classify); + +struct tcf_chain_info { + struct tcf_proto __rcu **pprev; + struct tcf_proto __rcu *next; +}; + +static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info) +{ + return rtnl_dereference(*chain_info->pprev); +} + +static void tcf_chain_tp_insert(struct tcf_chain *chain, + struct tcf_chain_info *chain_info, + struct tcf_proto *tp) +{ + if (chain->p_filter_chain && + *chain_info->pprev == chain->filter_chain) + rcu_assign_pointer(*chain->p_filter_chain, tp); + RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); + rcu_assign_pointer(*chain_info->pprev, tp); +} + +static void tcf_chain_tp_remove(struct tcf_chain *chain, + struct tcf_chain_info *chain_info, + struct tcf_proto *tp) +{ + struct tcf_proto *next = rtnl_dereference(chain_info->next); + + if (chain->p_filter_chain && tp == chain->filter_chain) + RCU_INIT_POINTER(*chain->p_filter_chain, next); + RCU_INIT_POINTER(*chain_info->pprev, next); +} + +static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, + struct tcf_chain_info *chain_info, + u32 protocol, u32 prio, + bool prio_allocate) +{ + struct tcf_proto **pprev; + struct tcf_proto *tp; + + /* Check the chain for existence of proto-tcf with this priority */ + for (pprev = &chain->filter_chain; + (tp = rtnl_dereference(*pprev)); pprev = &tp->next) { + if (tp->prio >= prio) { + if (tp->prio == prio) { + if (prio_allocate || + (tp->protocol != protocol && protocol)) + return ERR_PTR(-EINVAL); + } else { + tp = NULL; + } + break; + } + } + chain_info->pprev = pprev; + chain_info->next = tp ? tp->next : NULL; + return tp; +} /* Add/change/delete/get a filter node */ @@ -206,13 +417,14 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct tcmsg *t; u32 protocol; u32 prio; - u32 nprio; + bool prio_allocate; u32 parent; + u32 chain_index; struct net_device *dev; struct Qdisc *q; - struct tcf_proto __rcu **back; - struct tcf_proto __rcu **chain; - struct tcf_proto *next; + struct tcf_chain_info chain_info; + struct tcf_chain *chain = NULL; + struct tcf_block *block; struct tcf_proto *tp; const struct Qdisc_class_ops *cops; unsigned long cl; @@ -234,7 +446,7 @@ replay: t = nlmsg_data(n); protocol = TC_H_MIN(t->tcm_info); prio = TC_H_MAJ(t->tcm_info); - nprio = prio; + prio_allocate = false; parent = t->tcm_parent; cl = 0; @@ -250,6 +462,7 @@ replay: */ if (n->nlmsg_flags & NLM_F_CREATE) { prio = TC_H_MAKE(0x80000000U, 0U); + prio_allocate = true; break; } /* fall-through */ @@ -280,7 +493,7 @@ replay: if (!cops) return -EINVAL; - if (cops->tcf_chain == NULL) + if (!cops->tcf_block) return -EOPNOTSUPP; /* Do we search for filter, attached to class? */ @@ -291,34 +504,36 @@ replay: } /* And the last stroke */ - chain = cops->tcf_chain(q, cl); - if (chain == NULL) { + block = cops->tcf_block(q, cl); + if (!block) { err = -EINVAL; goto errout; } + + chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; + if (chain_index > TC_ACT_EXT_VAL_MASK) { + err = -EINVAL; + goto errout; + } + chain = tcf_chain_get(block, chain_index, + n->nlmsg_type == RTM_NEWTFILTER); + if (!chain) { + err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL; + goto errout; + } + if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) { tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER); - tcf_destroy_chain(chain); + tcf_chain_flush(chain); err = 0; goto errout; } - /* Check the chain for existence of proto-tcf with this priority */ - for (back = chain; - (tp = rtnl_dereference(*back)) != NULL; - back = &tp->next) { - if (tp->prio >= prio) { - if (tp->prio == prio) { - if (!nprio || - (tp->protocol != protocol && protocol)) { - err = -EINVAL; - goto errout; - } - } else { - tp = NULL; - } - break; - } + tp = tcf_chain_tp_find(chain, &chain_info, protocol, + prio, prio_allocate); + if (IS_ERR(tp)) { + err = PTR_ERR(tp); + goto errout; } if (tp == NULL) { @@ -335,11 +550,11 @@ replay: goto errout; } - if (!nprio) - nprio = TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back))); + if (prio_allocate) + prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info)); tp = tcf_proto_create(nla_data(tca[TCA_KIND]), - protocol, nprio, parent, q); + protocol, prio, parent, q, chain); if (IS_ERR(tp)) { err = PTR_ERR(tp); goto errout; @@ -354,8 +569,7 @@ replay: if (fh == 0) { if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { - next = rtnl_dereference(tp->next); - RCU_INIT_POINTER(*back, next); + tcf_chain_tp_remove(chain, &chain_info, tp); tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER, false); tcf_proto_destroy(tp); @@ -384,11 +598,10 @@ replay: err = tp->ops->delete(tp, fh, &last); if (err) goto errout; - next = rtnl_dereference(tp->next); tfilter_notify(net, skb, n, tp, t->tcm_handle, RTM_DELTFILTER, false); if (last) { - RCU_INIT_POINTER(*back, next); + tcf_chain_tp_remove(chain, &chain_info, tp); tcf_proto_destroy(tp); } goto errout; @@ -405,10 +618,8 @@ replay: err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE); if (err == 0) { - if (tp_created) { - RCU_INIT_POINTER(tp->next, rtnl_dereference(*back)); - rcu_assign_pointer(*back, tp); - } + if (tp_created) + tcf_chain_tp_insert(chain, &chain_info, tp); tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false); } else { if (tp_created) @@ -416,6 +627,8 @@ replay: } errout: + if (chain) + tcf_chain_put(chain); if (cl) cops->put(q, cl); if (err == -EAGAIN) @@ -444,6 +657,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb, tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) goto nla_put_failure; + if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index)) + goto nla_put_failure; tcm->tcm_handle = fh; if (RTM_DELTFILTER != event) { tcm->tcm_handle = 0; @@ -500,22 +715,76 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, RTM_NEWTFILTER); } +static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb, + struct netlink_callback *cb, + long index_start, long *p_index) +{ + struct net *net = sock_net(skb->sk); + struct tcmsg *tcm = nlmsg_data(cb->nlh); + struct tcf_dump_args arg; + struct tcf_proto *tp; + + for (tp = rtnl_dereference(chain->filter_chain); + tp; tp = rtnl_dereference(tp->next), (*p_index)++) { + if (*p_index < index_start) + continue; + if (TC_H_MAJ(tcm->tcm_info) && + TC_H_MAJ(tcm->tcm_info) != tp->prio) + continue; + if (TC_H_MIN(tcm->tcm_info) && + TC_H_MIN(tcm->tcm_info) != tp->protocol) + continue; + if (*p_index > index_start) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (cb->args[1] == 0) { + if (tcf_fill_node(net, skb, tp, 0, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWTFILTER) <= 0) + return false; + + cb->args[1] = 1; + } + if (!tp->ops->walk) + continue; + arg.w.fn = tcf_node_dump; + arg.skb = skb; + arg.cb = cb; + arg.w.stop = 0; + arg.w.skip = cb->args[1] - 1; + arg.w.count = 0; + tp->ops->walk(tp, &arg.w); + cb->args[1] = arg.w.count + 1; + if (arg.w.stop) + return false; + } + return true; +} + /* called with RTNL */ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - int t; - int s_t; + struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; struct Qdisc *q; - struct tcf_proto *tp, __rcu **chain; + struct tcf_block *block; + struct tcf_chain *chain; struct tcmsg *tcm = nlmsg_data(cb->nlh); unsigned long cl = 0; const struct Qdisc_class_ops *cops; - struct tcf_dump_args arg; + long index_start; + long index; + int err; if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; + + err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); + if (err) + return err; + dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return skb->len; @@ -529,56 +798,29 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) cops = q->ops->cl_ops; if (!cops) goto errout; - if (cops->tcf_chain == NULL) + if (!cops->tcf_block) goto errout; if (TC_H_MIN(tcm->tcm_parent)) { cl = cops->get(q, tcm->tcm_parent); if (cl == 0) goto errout; } - chain = cops->tcf_chain(q, cl); - if (chain == NULL) + block = cops->tcf_block(q, cl); + if (!block) goto errout; - s_t = cb->args[0]; - - for (tp = rtnl_dereference(*chain), t = 0; - tp; tp = rtnl_dereference(tp->next), t++) { - if (t < s_t) - continue; - if (TC_H_MAJ(tcm->tcm_info) && - TC_H_MAJ(tcm->tcm_info) != tp->prio) - continue; - if (TC_H_MIN(tcm->tcm_info) && - TC_H_MIN(tcm->tcm_info) != tp->protocol) - continue; - if (t > s_t) - memset(&cb->args[1], 0, - sizeof(cb->args)-sizeof(cb->args[0])); - if (cb->args[1] == 0) { - if (tcf_fill_node(net, skb, tp, 0, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - RTM_NEWTFILTER) <= 0) - break; + index_start = cb->args[0]; + index = 0; - cb->args[1] = 1; - } - if (tp->ops->walk == NULL) + list_for_each_entry(chain, &block->chain_list, list) { + if (tca[TCA_CHAIN] && + nla_get_u32(tca[TCA_CHAIN]) != chain->index) continue; - arg.w.fn = tcf_node_dump; - arg.skb = skb; - arg.cb = cb; - arg.w.stop = 0; - arg.w.skip = cb->args[1] - 1; - arg.w.count = 0; - tp->ops->walk(tp, &arg.w); - cb->args[1] = arg.w.count + 1; - if (arg.w.stop) + if (!tcf_chain_dump(chain, skb, cb, index_start, &index)) break; } - cb->args[0] = t; + cb->args[0] = index; errout: if (cl) @@ -608,8 +850,9 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct tc_action *act; if (exts->police && tb[exts->police]) { - act = tcf_action_init_1(net, tb[exts->police], rate_tlv, - "police", ovr, TCA_ACT_BIND); + act = tcf_action_init_1(net, tp, tb[exts->police], + rate_tlv, "police", ovr, + TCA_ACT_BIND); if (IS_ERR(act)) return PTR_ERR(act); @@ -620,8 +863,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, LIST_HEAD(actions); int err, i = 0; - err = tcf_action_init(net, tb[exts->action], rate_tlv, - NULL, ovr, TCA_ACT_BIND, + err = tcf_action_init(net, tp, tb[exts->action], + rate_tlv, NULL, ovr, TCA_ACT_BIND, &actions); if (err) return err; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5ebeae996e63..be0cfdf48976 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -70,6 +70,7 @@ static int cls_bpf_exec_opcode(int code) case TC_ACT_OK: case TC_ACT_SHOT: case TC_ACT_STOLEN: + case TC_ACT_TRAP: case TC_ACT_REDIRECT: case TC_ACT_UNSPEC: return code; @@ -161,6 +162,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, bpf_offload.gen_flags = prog->gen_flags; err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->chain->index, tp->protocol, &offload); if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE)) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index ca526c0881bd..7832eb93379b 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -49,6 +49,8 @@ struct fl_flow_key { }; struct flow_dissector_key_ports enc_tp; struct flow_dissector_key_mpls mpls; + struct flow_dissector_key_tcp tcp; + struct flow_dissector_key_ip ip; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -237,7 +239,8 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) tc->type = TC_SETUP_CLSFLOWER; tc->cls_flower = &offload; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc); + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index, + tp->protocol, tc); } static int fl_hw_replace_filter(struct tcf_proto *tp, @@ -273,8 +276,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, tc->type = TC_SETUP_CLSFLOWER; tc->cls_flower = &offload; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, - tc); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->chain->index, tp->protocol, tc); if (!err) f->flags |= TCA_CLS_FLAGS_IN_HW; @@ -300,7 +303,8 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) tc->type = TC_SETUP_CLSFLOWER; tc->cls_flower = &offload; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc); + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->chain->index, tp->protocol, tc); } static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) @@ -424,6 +428,12 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_MPLS_BOS] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_MPLS_TC] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_MPLS_LABEL] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_TCP_FLAGS] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_FLAGS_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_IP_TOS] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_IP_TOS_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_IP_TTL] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_IP_TTL_MASK] = { .type = NLA_U8 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -525,6 +535,19 @@ static int fl_set_key_flags(struct nlattr **tb, return 0; } +static void fl_set_key_ip(struct nlattr **tb, + struct flow_dissector_key_ip *key, + struct flow_dissector_key_ip *mask) +{ + fl_set_key_val(tb, &key->tos, TCA_FLOWER_KEY_IP_TOS, + &mask->tos, TCA_FLOWER_KEY_IP_TOS_MASK, + sizeof(key->tos)); + + fl_set_key_val(tb, &key->ttl, TCA_FLOWER_KEY_IP_TTL, + &mask->ttl, TCA_FLOWER_KEY_IP_TTL_MASK, + sizeof(key->ttl)); +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask) { @@ -567,6 +590,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, sizeof(key->basic.ip_proto)); + fl_set_key_ip(tb, &key->ip, &mask->ip); } if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) { @@ -596,6 +620,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK, sizeof(key->tp.dst)); + fl_set_key_val(tb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS, + &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK, + sizeof(key->tcp.flags)); } else if (key->basic.ip_proto == IPPROTO_UDP) { fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK, @@ -767,6 +794,10 @@ static void fl_init_dissector(struct cls_fl_head *head, FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_IP, ip); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_TCP, tcp); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_ICMP, icmp); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_ARP, arp); @@ -1074,6 +1105,19 @@ static int fl_dump_key_mpls(struct sk_buff *skb, return 0; } +static int fl_dump_key_ip(struct sk_buff *skb, + struct flow_dissector_key_ip *key, + struct flow_dissector_key_ip *mask) +{ + if (fl_dump_key_val(skb, &key->tos, TCA_FLOWER_KEY_IP_TOS, &mask->tos, + TCA_FLOWER_KEY_IP_TOS_MASK, sizeof(key->tos)) || + fl_dump_key_val(skb, &key->ttl, TCA_FLOWER_KEY_IP_TTL, &mask->ttl, + TCA_FLOWER_KEY_IP_TTL_MASK, sizeof(key->ttl))) + return -1; + + return 0; +} + static int fl_dump_key_vlan(struct sk_buff *skb, struct flow_dissector_key_vlan *vlan_key, struct flow_dissector_key_vlan *vlan_mask) @@ -1187,9 +1231,10 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if ((key->basic.n_proto == htons(ETH_P_IP) || key->basic.n_proto == htons(ETH_P_IPV6)) && - fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, + (fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, - sizeof(key->basic.ip_proto))) + sizeof(key->basic.ip_proto)) || + fl_dump_key_ip(skb, &key->ip, &mask->ip))) goto nla_put_failure; if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && @@ -1215,7 +1260,10 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, sizeof(key->tp.src)) || fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK, - sizeof(key->tp.dst)))) + sizeof(key->tp.dst)) || + fl_dump_key_val(skb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS, + &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK, + sizeof(key->tcp.flags)))) goto nla_put_failure; else if (key->basic.ip_proto == IPPROTO_UDP && (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 51859b8edd7e..9dc26c32cf32 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -64,8 +64,9 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, offload.cls_mall->exts = &head->exts; offload.cls_mall->cookie = cookie; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, - &offload); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->chain->index, + tp->protocol, &offload); if (!err) head->flags |= TCA_CLS_FLAGS_IN_HW; @@ -86,8 +87,8 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp, offload.cls_mall->exts = NULL; offload.cls_mall->cookie = cookie; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, - &offload); + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index, + tp->protocol, &offload); } static void mall_destroy(struct tcf_proto *tp) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d20e72a095d5..2d01195153e6 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -441,7 +441,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) offload.cls_u32->command = TC_CLSU32_DELETE_KNODE; offload.cls_u32->knode.handle = handle; dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); + tp->chain->index, tp->protocol, + &offload); } } @@ -465,7 +466,8 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, offload.cls_u32->hnode.prio = h->prio; err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); + tp->chain->index, tp->protocol, + &offload); if (tc_skip_sw(flags)) return err; @@ -488,7 +490,8 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) offload.cls_u32->hnode.prio = h->prio; dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); + tp->chain->index, tp->protocol, + &offload); } } @@ -522,7 +525,8 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, offload.cls_u32->knode.link_handle = n->ht_down->handle; err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); + tp->chain->index, tp->protocol, + &offload); if (!err) n->flags |= TCA_CLS_FLAGS_IN_HW; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e88342fde1bc..5d95401bbc02 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -163,7 +163,7 @@ int register_qdisc(struct Qdisc_ops *qops) if (!(cops->get && cops->put && cops->walk && cops->leaf)) goto out_einval; - if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf)) + if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf)) goto out_einval; } @@ -1878,54 +1878,6 @@ done: return skb->len; } -/* Main classifier routine: scans classifier chain attached - * to this qdisc, (optionally) tests for protocol and asks - * specific classifiers. - */ -int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode) -{ - __be16 protocol = tc_skb_protocol(skb); -#ifdef CONFIG_NET_CLS_ACT - const int max_reclassify_loop = 4; - const struct tcf_proto *old_tp = tp; - int limit = 0; - -reclassify: -#endif - for (; tp; tp = rcu_dereference_bh(tp->next)) { - int err; - - if (tp->protocol != protocol && - tp->protocol != htons(ETH_P_ALL)) - continue; - - err = tp->classify(skb, tp, res); -#ifdef CONFIG_NET_CLS_ACT - if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) - goto reset; -#endif - if (err >= 0) - return err; - } - - return TC_ACT_UNSPEC; /* signal: continue lookup */ -#ifdef CONFIG_NET_CLS_ACT -reset: - if (unlikely(limit++ >= max_reclassify_loop)) { - net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n", - tp->q->ops->id, tp->prio & 0xffff, - ntohs(tp->protocol)); - return TC_ACT_SHOT; - } - - tp = old_tp; - protocol = tc_skb_protocol(skb); - goto reclassify; -#endif -} -EXPORT_SYMBOL(tc_classify); - #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 40cbceed4de8..de162592eee0 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -43,6 +43,7 @@ struct atm_flow_data { struct Qdisc *q; /* FIFO, TBF, etc. */ struct tcf_proto __rcu *filter_list; + struct tcf_block *block; struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb); /* chaining */ @@ -143,7 +144,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) list_del_init(&flow->list); pr_debug("atm_tc_put: qdisc %p\n", flow->q); qdisc_destroy(flow->q); - tcf_destroy_chain(&flow->filter_list); + tcf_block_put(flow->block); if (flow->sock) { pr_debug("atm_tc_put: f_count %ld\n", file_count(flow->sock->file)); @@ -274,7 +275,13 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, error = -ENOBUFS; goto err_out; } - RCU_INIT_POINTER(flow->filter_list, NULL); + + error = tcf_block_get(&flow->block, &flow->filter_list); + if (error) { + kfree(flow); + goto err_out; + } + flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); if (!flow->q) flow->q = &noop_qdisc; @@ -346,14 +353,13 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) } } -static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow = (struct atm_flow_data *)cl; pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - return flow ? &flow->filter_list : &p->link.filter_list; + return flow ? flow->block : p->link.block; } /* --------------------------- Qdisc operations ---------------------------- */ @@ -377,7 +383,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, list_for_each_entry(flow, &p->flows, list) { fl = rcu_dereference_bh(flow->filter_list); if (fl) { - result = tc_classify(skb, fl, &res, true); + result = tcf_classify(skb, fl, &res, true); if (result < 0) continue; flow = (struct atm_flow_data *)res.class; @@ -400,6 +406,7 @@ done: switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: __qdisc_drop(skb, to_free); return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: @@ -524,6 +531,7 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch) static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) { struct atm_qdisc_data *p = qdisc_priv(sch); + int err; pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); INIT_LIST_HEAD(&p->flows); @@ -534,7 +542,11 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) if (!p->link.q) p->link.q = &noop_qdisc; pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); - RCU_INIT_POINTER(p->link.filter_list, NULL); + + err = tcf_block_get(&p->link.block, &p->link.filter_list); + if (err) + return err; + p->link.vcc = NULL; p->link.sock = NULL; p->link.classid = sch->handle; @@ -561,7 +573,7 @@ static void atm_tc_destroy(struct Qdisc *sch) pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); list_for_each_entry(flow, &p->flows, list) - tcf_destroy_chain(&flow->filter_list); + tcf_block_put(flow->block); list_for_each_entry_safe(flow, tmp, &p->flows, list) { if (flow->ref > 1) @@ -646,7 +658,7 @@ static const struct Qdisc_class_ops atm_class_ops = { .change = atm_tc_change, .delete = atm_tc_delete, .walk = atm_tc_walk, - .tcf_chain = atm_tc_find_tcf, + .tcf_block = atm_tc_tcf_block, .bind_tcf = atm_tc_bind_filter, .unbind_tcf = atm_tc_put, .dump = atm_tc_dump_class, diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 7415859fd4c3..481036f6b54e 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -127,6 +127,7 @@ struct cbq_class { struct tc_cbq_xstats xstats; struct tcf_proto __rcu *filter_list; + struct tcf_block *block; int refcnt; int filters; @@ -233,7 +234,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) /* * Step 2+n. Apply classifier. */ - result = tc_classify(skb, fl, &res, true); + result = tcf_classify(skb, fl, &res, true); if (!fl || result < 0) goto fallback; @@ -253,6 +254,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; @@ -1405,7 +1407,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) WARN_ON(cl->filters); - tcf_destroy_chain(&cl->filter_list); + tcf_block_put(cl->block); qdisc_destroy(cl->q); qdisc_put_rtab(cl->R_tab); gen_kill_estimator(&cl->rate_est); @@ -1430,7 +1432,7 @@ static void cbq_destroy(struct Qdisc *sch) */ for (h = 0; h < q->clhash.hashsize; h++) { hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) - tcf_destroy_chain(&cl->filter_list); + tcf_block_put(cl->block); } for (h = 0; h < q->clhash.hashsize; h++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], @@ -1585,12 +1587,19 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (cl == NULL) goto failure; + err = tcf_block_get(&cl->block, &cl->filter_list); + if (err) { + kfree(cl); + return err; + } + if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { + tcf_block_put(cl->block); kfree(cl); goto failure; } @@ -1688,8 +1697,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) return 0; } -static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch, - unsigned long arg) +static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; @@ -1697,7 +1705,7 @@ static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch, if (cl == NULL) cl = &q->link; - return &cl->filter_list; + return cl->block; } static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, @@ -1756,7 +1764,7 @@ static const struct Qdisc_class_ops cbq_class_ops = { .change = cbq_change_class, .delete = cbq_delete, .walk = cbq_walk, - .tcf_chain = cbq_find_tcf, + .tcf_block = cbq_tcf_block, .bind_tcf = cbq_bind_filter, .unbind_tcf = cbq_unbind_filter, .dump = cbq_dump_class, diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 58a8c32eab23..a413dc1c2098 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -36,6 +36,7 @@ struct drr_class { struct drr_sched { struct list_head active; struct tcf_proto __rcu *filter_list; + struct tcf_block *block; struct Qdisc_class_hash clhash; }; @@ -190,15 +191,14 @@ static void drr_put_class(struct Qdisc *sch, unsigned long arg) drr_destroy_class(sch, cl); } -static struct tcf_proto __rcu **drr_tcf_chain(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl) { struct drr_sched *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent, @@ -333,12 +333,13 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; fl = rcu_dereference_bh(q->filter_list); - result = tc_classify(skb, fl, &res, false); + result = tcf_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; @@ -431,6 +432,9 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt) struct drr_sched *q = qdisc_priv(sch); int err; + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; err = qdisc_class_hash_init(&q->clhash); if (err < 0) return err; @@ -462,7 +466,7 @@ static void drr_destroy_qdisc(struct Qdisc *sch) struct hlist_node *next; unsigned int i; - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], @@ -477,7 +481,7 @@ static const struct Qdisc_class_ops drr_class_ops = { .delete = drr_delete_class, .get = drr_get_class, .put = drr_put_class, - .tcf_chain = drr_tcf_chain, + .tcf_block = drr_tcf_block, .bind_tcf = drr_bind_tcf, .unbind_tcf = drr_unbind_tcf, .graft = drr_graft_class, diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 1c0f877f673a..6d94fcc3592a 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -44,6 +44,7 @@ struct mask_value { struct dsmark_qdisc_data { struct Qdisc *q; struct tcf_proto __rcu *filter_list; + struct tcf_block *block; struct mask_value *mv; u16 indices; u8 set_tc_index; @@ -183,11 +184,11 @@ ignore: } } -static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl) { struct dsmark_qdisc_data *p = qdisc_priv(sch); - return &p->filter_list; + + return p->block; } /* --------------------------- Qdisc operations ---------------------------- */ @@ -234,7 +235,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, else { struct tcf_result res; struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result = tc_classify(skb, fl, &res, false); + int result = tcf_classify(skb, fl, &res, false); pr_debug("result %d class 0x%04x\n", result, res.classid); @@ -242,6 +243,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, #ifdef CONFIG_NET_CLS_ACT case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: __qdisc_drop(skb, to_free); return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; @@ -342,6 +344,10 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) goto errout; + err = tcf_block_get(&p->block, &p->filter_list); + if (err) + return err; + err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL); if (err < 0) goto errout; @@ -400,7 +406,7 @@ static void dsmark_destroy(struct Qdisc *sch) pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - tcf_destroy_chain(&p->filter_list); + tcf_block_put(p->block); qdisc_destroy(p->q); if (p->mv != p->embedded) kfree(p->mv); @@ -468,7 +474,7 @@ static const struct Qdisc_class_ops dsmark_class_ops = { .change = dsmark_change, .delete = dsmark_delete, .walk = dsmark_walk, - .tcf_chain = dsmark_find_tcf, + .tcf_block = dsmark_tcf_block, .bind_tcf = dsmark_bind_filter, .unbind_tcf = dsmark_put, .dump = dsmark_dump_class, diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index b488721a0059..147fde73a0f5 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -390,9 +390,17 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->stat_tcp_retrans++; qdisc_qstats_backlog_inc(sch, skb); if (fq_flow_is_detached(f)) { + struct sock *sk = skb->sk; + fq_flow_add_tail(&q->new_flows, f); if (time_after(jiffies, f->age + q->flow_refill_delay)) f->credit = max_t(u32, f->credit, q->quantum); + if (sk && q->rate_enable) { + if (unlikely(smp_load_acquire(&sk->sk_pacing_status) != + SK_PACING_FQ)) + smp_store_release(&sk->sk_pacing_status, + SK_PACING_FQ); + } q->inactive_flows--; } diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 9201abce928c..337f2d6d81e4 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -55,6 +55,7 @@ struct fq_codel_flow { struct fq_codel_sched_data { struct tcf_proto __rcu *filter_list; /* optional external classifier */ + struct tcf_block *block; struct fq_codel_flow *flows; /* Flows table [flows_cnt] */ u32 *backlogs; /* backlog table [flows_cnt] */ u32 flows_cnt; /* number of flows */ @@ -96,12 +97,13 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, return fq_codel_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tc_classify(skb, filter, &res, false); + result = tcf_classify(skb, filter, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return 0; @@ -450,7 +452,7 @@ static void fq_codel_destroy(struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); kvfree(q->backlogs); kvfree(q->flows); } @@ -459,6 +461,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) { struct fq_codel_sched_data *q = qdisc_priv(sch); int i; + int err; sch->limit = 10*1024; q->flows_cnt = 1024; @@ -478,6 +481,10 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) return err; } + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; + if (!q->flows) { q->flows = kvzalloc(q->flows_cnt * sizeof(struct fq_codel_flow), GFP_KERNEL); @@ -589,14 +596,13 @@ static void fq_codel_put(struct Qdisc *q, unsigned long cl) { } -static struct tcf_proto __rcu **fq_codel_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl) { struct fq_codel_sched_data *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static int fq_codel_dump_class(struct Qdisc *sch, unsigned long cl, @@ -679,7 +685,7 @@ static const struct Qdisc_class_ops fq_codel_class_ops = { .leaf = fq_codel_leaf, .get = fq_codel_get, .put = fq_codel_put, - .tcf_chain = fq_codel_find_tcf, + .tcf_block = fq_codel_tcf_block, .bind_tcf = fq_codel_bind, .unbind_tcf = fq_codel_put, .dump = fq_codel_dump_class, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 5cb82f6c1b06..b52f74610dc7 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -116,6 +116,7 @@ struct hfsc_class { struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct tcf_proto __rcu *filter_list; /* filter list */ + struct tcf_block *block; unsigned int filter_cnt; /* filter count */ unsigned int level; /* class level in hierarchy */ @@ -1040,12 +1041,19 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; + err = tcf_block_get(&cl->block, &cl->filter_list); + if (err) { + kfree(cl); + return err; + } + if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { + tcf_block_put(cl->block); kfree(cl); return err; } @@ -1091,7 +1099,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) { struct hfsc_sched *q = qdisc_priv(sch); - tcf_destroy_chain(&cl->filter_list); + tcf_block_put(cl->block); qdisc_destroy(cl->qdisc); gen_kill_estimator(&cl->rate_est); if (cl != &q->root) @@ -1142,11 +1150,12 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; head = &q->root; tcf = rcu_dereference_bh(q->root.filter_list); - while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) { + while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; @@ -1261,8 +1270,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg) cl->filter_cnt--; } -static struct tcf_proto __rcu ** -hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg) +static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl = (struct hfsc_class *)arg; @@ -1270,7 +1278,7 @@ hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg) if (cl == NULL) cl = &q->root; - return &cl->filter_list; + return cl->block; } static int @@ -1515,7 +1523,7 @@ hfsc_destroy_qdisc(struct Qdisc *sch) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) - tcf_destroy_chain(&cl->filter_list); + tcf_block_put(cl->block); } for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], @@ -1662,7 +1670,7 @@ static const struct Qdisc_class_ops hfsc_class_ops = { .put = hfsc_put_class, .bind_tcf = hfsc_bind_tcf, .unbind_tcf = hfsc_unbind_tcf, - .tcf_chain = hfsc_tcf_chain, + .tcf_block = hfsc_tcf_block, .dump = hfsc_dump_class, .dump_stats = hfsc_dump_class_stats, .walk = hfsc_walk diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 570ef3b0c09b..203286ab4427 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -105,6 +105,7 @@ struct htb_class { int quantum; /* but stored for parent-to-leaf return */ struct tcf_proto __rcu *filter_list; /* class attached filters */ + struct tcf_block *block; int filter_cnt; int refcnt; /* usage count of this class */ @@ -156,6 +157,7 @@ struct htb_sched { /* filters for qdisc itself */ struct tcf_proto __rcu *filter_list; + struct tcf_block *block; #define HTB_WARN_TOOMANYEVENTS 0x1 unsigned int warned; /* only one warning */ @@ -231,11 +233,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) { + while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; @@ -1017,6 +1020,10 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; + err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL); if (err < 0) return err; @@ -1230,7 +1237,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) qdisc_destroy(cl->un.leaf.q); } gen_kill_estimator(&cl->rate_est); - tcf_destroy_chain(&cl->filter_list); + tcf_block_put(cl->block); kfree(cl); } @@ -1248,11 +1255,11 @@ static void htb_destroy(struct Qdisc *sch) * because filter need its target class alive to be able to call * unbind_filter on it (without Oops). */ - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) - tcf_destroy_chain(&cl->filter_list); + tcf_block_put(cl->block); } for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], @@ -1396,6 +1403,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; + err = tcf_block_get(&cl->block, &cl->filter_list); + if (err) { + kfree(cl); + goto failure; + } if (htb_rate_est || tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, @@ -1403,6 +1415,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, qdisc_root_sleeping_running(sch), tca[TCA_RATE] ? : &est.nla); if (err) { + tcf_block_put(cl->block); kfree(cl); goto failure; } @@ -1521,14 +1534,12 @@ failure: return err; } -static struct tcf_proto __rcu **htb_find_tcf(struct Qdisc *sch, - unsigned long arg) +static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; - struct tcf_proto __rcu **fl = cl ? &cl->filter_list : &q->filter_list; - return fl; + return cl ? cl->block : q->block; } static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, @@ -1591,7 +1602,7 @@ static const struct Qdisc_class_ops htb_class_ops = { .change = htb_change_class, .delete = htb_delete, .walk = htb_walk, - .tcf_chain = htb_find_tcf, + .tcf_block = htb_tcf_block, .bind_tcf = htb_bind_filter, .unbind_tcf = htb_unbind_filter, .dump = htb_dump_class, diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 3bab5f66c392..d8a9bebcab90 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -18,6 +18,10 @@ #include <net/pkt_sched.h> #include <net/pkt_cls.h> +struct ingress_sched_data { + struct tcf_block *block; +}; + static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) { return NULL; @@ -47,16 +51,23 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker) { } -static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl) { - struct net_device *dev = qdisc_dev(sch); + struct ingress_sched_data *q = qdisc_priv(sch); - return &dev->ingress_cl_list; + return q->block; } static int ingress_init(struct Qdisc *sch, struct nlattr *opt) { + struct ingress_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + int err; + + err = tcf_block_get(&q->block, &dev->ingress_cl_list); + if (err) + return err; + net_inc_ingress_queue(); sch->flags |= TCQ_F_CPUSTATS; @@ -65,9 +76,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) static void ingress_destroy(struct Qdisc *sch) { - struct net_device *dev = qdisc_dev(sch); + struct ingress_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&dev->ingress_cl_list); + tcf_block_put(q->block); net_dec_ingress_queue(); } @@ -91,7 +102,7 @@ static const struct Qdisc_class_ops ingress_class_ops = { .get = ingress_get, .put = ingress_put, .walk = ingress_walk, - .tcf_chain = ingress_find_tcf, + .tcf_block = ingress_tcf_block, .tcf_cl_offload = ingress_cl_offload, .bind_tcf = ingress_bind_filter, .unbind_tcf = ingress_put, @@ -100,12 +111,18 @@ static const struct Qdisc_class_ops ingress_class_ops = { static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .cl_ops = &ingress_class_ops, .id = "ingress", + .priv_size = sizeof(struct ingress_sched_data), .init = ingress_init, .destroy = ingress_destroy, .dump = ingress_dump, .owner = THIS_MODULE, }; +struct clsact_sched_data { + struct tcf_block *ingress_block; + struct tcf_block *egress_block; +}; + static unsigned long clsact_get(struct Qdisc *sch, u32 classid) { switch (TC_H_MIN(classid)) { @@ -128,16 +145,15 @@ static unsigned long clsact_bind_filter(struct Qdisc *sch, return clsact_get(sch, classid); } -static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl) { - struct net_device *dev = qdisc_dev(sch); + struct clsact_sched_data *q = qdisc_priv(sch); switch (cl) { case TC_H_MIN(TC_H_MIN_INGRESS): - return &dev->ingress_cl_list; + return q->ingress_block; case TC_H_MIN(TC_H_MIN_EGRESS): - return &dev->egress_cl_list; + return q->egress_block; default: return NULL; } @@ -145,6 +161,18 @@ static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch, static int clsact_init(struct Qdisc *sch, struct nlattr *opt) { + struct clsact_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + int err; + + err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list); + if (err) + return err; + + err = tcf_block_get(&q->egress_block, &dev->egress_cl_list); + if (err) + return err; + net_inc_ingress_queue(); net_inc_egress_queue(); @@ -155,10 +183,10 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) static void clsact_destroy(struct Qdisc *sch) { - struct net_device *dev = qdisc_dev(sch); + struct clsact_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&dev->ingress_cl_list); - tcf_destroy_chain(&dev->egress_cl_list); + tcf_block_put(q->egress_block); + tcf_block_put(q->ingress_block); net_dec_ingress_queue(); net_dec_egress_queue(); @@ -169,7 +197,7 @@ static const struct Qdisc_class_ops clsact_class_ops = { .get = clsact_get, .put = ingress_put, .walk = ingress_walk, - .tcf_chain = clsact_find_tcf, + .tcf_block = clsact_tcf_block, .tcf_cl_offload = clsact_cl_offload, .bind_tcf = clsact_bind_filter, .unbind_tcf = ingress_put, @@ -178,6 +206,7 @@ static const struct Qdisc_class_ops clsact_class_ops = { static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { .cl_ops = &clsact_class_ops, .id = "clsact", + .priv_size = sizeof(struct clsact_sched_data), .init = clsact_init, .destroy = clsact_destroy, .dump = ingress_dump, diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 0a4cf27ea54b..e0c02725cd48 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -43,7 +43,7 @@ static void mqprio_destroy(struct Qdisc *sch) struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO, { .mqprio = &offload } }; - dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); + dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, 0, &tc); } else { netdev_set_num_tc(dev, 0); } @@ -152,7 +152,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO, { .mqprio = &offload } }; - err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); + err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, + 0, 0, &tc); if (err) return err; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 43a3a10b3c81..f143b7bbaa0d 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -32,6 +32,7 @@ struct multiq_sched_data { u16 max_bands; u16 curband; struct tcf_proto __rcu *filter_list; + struct tcf_block *block; struct Qdisc **queues; }; @@ -46,11 +47,12 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) int err; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - err = tc_classify(skb, fl, &res, false); + err = tcf_classify(skb, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; @@ -170,7 +172,7 @@ multiq_destroy(struct Qdisc *sch) int band; struct multiq_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); for (band = 0; band < q->bands; band++) qdisc_destroy(q->queues[band]); @@ -243,6 +245,10 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; + q->max_bands = qdisc_dev(sch)->num_tx_queues; q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL); @@ -367,14 +373,13 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg) } } -static struct tcf_proto __rcu **multiq_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl) { struct multiq_sched_data *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static const struct Qdisc_class_ops multiq_class_ops = { @@ -383,7 +388,7 @@ static const struct Qdisc_class_ops multiq_class_ops = { .get = multiq_get, .put = multiq_put, .walk = multiq_walk, - .tcf_chain = multiq_find_tcf, + .tcf_block = multiq_tcf_block, .bind_tcf = multiq_bind, .unbind_tcf = multiq_put, .dump = multiq_dump_class, diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 92c2e6d448d7..e3e364cc9a70 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -25,6 +25,7 @@ struct prio_sched_data { int bands; struct tcf_proto __rcu *filter_list; + struct tcf_block *block; u8 prio2band[TC_PRIO_MAX+1]; struct Qdisc *queues[TCQ_PRIO_BANDS]; }; @@ -42,11 +43,12 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { fl = rcu_dereference_bh(q->filter_list); - err = tc_classify(skb, fl, &res, false); + err = tcf_classify(skb, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; @@ -145,7 +147,7 @@ prio_destroy(struct Qdisc *sch) int prio; struct prio_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); for (prio = 0; prio < q->bands; prio++) qdisc_destroy(q->queues[prio]); } @@ -204,9 +206,16 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) static int prio_init(struct Qdisc *sch, struct nlattr *opt) { + struct prio_sched_data *q = qdisc_priv(sch); + int err; + if (!opt) return -EINVAL; + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; + return prio_tune(sch, opt); } @@ -317,14 +326,13 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) } } -static struct tcf_proto __rcu **prio_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl) { struct prio_sched_data *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static const struct Qdisc_class_ops prio_class_ops = { @@ -333,7 +341,7 @@ static const struct Qdisc_class_ops prio_class_ops = { .get = prio_get, .put = prio_put, .walk = prio_walk, - .tcf_chain = prio_find_tcf, + .tcf_block = prio_tcf_block, .bind_tcf = prio_bind, .unbind_tcf = prio_put, .dump = prio_dump_class, diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 041eba3006cc..0e16dfda0bd7 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -182,6 +182,7 @@ struct qfq_group { struct qfq_sched { struct tcf_proto __rcu *filter_list; + struct tcf_block *block; struct Qdisc_class_hash clhash; u64 oldV, V; /* Precise virtual times. */ @@ -582,15 +583,14 @@ static void qfq_put_class(struct Qdisc *sch, unsigned long arg) qfq_destroy_class(sch, cl); } -static struct tcf_proto __rcu **qfq_tcf_chain(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl) { struct qfq_sched *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent, @@ -720,12 +720,13 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; fl = rcu_dereference_bh(q->filter_list); - result = tc_classify(skb, fl, &res, false); + result = tcf_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; @@ -1438,6 +1439,10 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) int i, j, err; u32 max_cl_shift, maxbudg_shift, max_classes; + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; + err = qdisc_class_hash_init(&q->clhash); if (err < 0) return err; @@ -1492,7 +1497,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch) struct hlist_node *next; unsigned int i; - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], @@ -1508,7 +1513,7 @@ static const struct Qdisc_class_ops qfq_class_ops = { .delete = qfq_delete_class, .get = qfq_get_class, .put = qfq_put_class, - .tcf_chain = qfq_tcf_chain, + .tcf_block = qfq_tcf_block, .bind_tcf = qfq_bind_tcf, .unbind_tcf = qfq_unbind_tcf, .graft = qfq_graft_class, diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 0f777273ba29..11fb6ec878d6 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -56,6 +56,7 @@ struct sfb_bins { struct sfb_sched_data { struct Qdisc *qdisc; struct tcf_proto __rcu *filter_list; + struct tcf_block *block; unsigned long rehash_interval; unsigned long warmup_time; /* double buffering warmup time in jiffies */ u32 max; @@ -259,12 +260,13 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, struct tcf_result res; int result; - result = tc_classify(skb, fl, &res, false); + result = tcf_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return false; @@ -465,7 +467,7 @@ static void sfb_destroy(struct Qdisc *sch) { struct sfb_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); qdisc_destroy(q->qdisc); } @@ -549,6 +551,11 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt) static int sfb_init(struct Qdisc *sch, struct nlattr *opt) { struct sfb_sched_data *q = qdisc_priv(sch); + int err; + + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; q->qdisc = &noop_qdisc; return sfb_change(sch, opt); @@ -657,14 +664,13 @@ static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker) } } -static struct tcf_proto __rcu **sfb_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *sfb_tcf_block(struct Qdisc *sch, unsigned long cl) { struct sfb_sched_data *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent, @@ -682,7 +688,7 @@ static const struct Qdisc_class_ops sfb_class_ops = { .change = sfb_change_class, .delete = sfb_delete, .walk = sfb_walk, - .tcf_chain = sfb_find_tcf, + .tcf_block = sfb_tcf_block, .bind_tcf = sfb_bind, .unbind_tcf = sfb_put, .dump = sfb_dump_class, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 332d94be6e1c..f80ea2cc5f1f 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -126,6 +126,7 @@ struct sfq_sched_data { u8 flags; unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ struct tcf_proto __rcu *filter_list; + struct tcf_block *block; sfq_index *ht; /* Hash table ('divisor' slots) */ struct sfq_slot *slots; /* Flows table ('maxflows' entries) */ @@ -180,12 +181,13 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return sfq_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tc_classify(skb, fl, &res, false); + result = tcf_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return 0; @@ -697,7 +699,7 @@ static void sfq_destroy(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); q->perturb_period = 0; del_timer_sync(&q->perturb_timer); sfq_free(q->ht); @@ -709,6 +711,11 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) { struct sfq_sched_data *q = qdisc_priv(sch); int i; + int err; + + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; setup_deferrable_timer(&q->perturb_timer, sfq_perturbation, (unsigned long)sch); @@ -815,14 +822,13 @@ static void sfq_put(struct Qdisc *q, unsigned long cl) { } -static struct tcf_proto __rcu **sfq_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl) { struct sfq_sched_data *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static int sfq_dump_class(struct Qdisc *sch, unsigned long cl, @@ -878,7 +884,7 @@ static const struct Qdisc_class_ops sfq_class_ops = { .leaf = sfq_leaf, .get = sfq_get, .put = sfq_put, - .tcf_chain = sfq_find_tcf, + .tcf_block = sfq_tcf_block, .bind_tcf = sfq_bind, .unbind_tcf = sfq_put, .dump = sfq_dump_class, diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 95238284c422..72b07dd9b959 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -246,7 +246,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; - if (sctp_stream_new(asoc, gfp)) + if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, + 0, gfp)) goto fail_init; /* Assume that peer would support both address types unless we are @@ -291,7 +292,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a return asoc; stream_free: - sctp_stream_free(asoc->stream); + sctp_stream_free(&asoc->stream); fail_init: sock_put(asoc->base.sk); sctp_endpoint_put(asoc->ep); @@ -365,7 +366,7 @@ void sctp_association_free(struct sctp_association *asoc) sctp_tsnmap_free(&asoc->peer.tsn_map); /* Free stream information. */ - sctp_stream_free(asoc->stream); + sctp_stream_free(&asoc->stream); if (asoc->strreset_chunk) sctp_chunk_free(asoc->strreset_chunk); @@ -1151,7 +1152,7 @@ void sctp_assoc_update(struct sctp_association *asoc, /* Reinitialize SSN for both local streams * and peer's streams. */ - sctp_stream_clear(asoc->stream); + sctp_stream_clear(&asoc->stream); /* Flush the ULP reassembly and ordered queue. * Any data there will now be stale and will @@ -1177,18 +1178,11 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->ctsn_ack_point = asoc->next_tsn - 1; asoc->adv_peer_ack_point = asoc->ctsn_ack_point; - if (sctp_state(asoc, COOKIE_WAIT)) { - sctp_stream_free(asoc->stream); - asoc->stream = new->stream; - new->stream = NULL; - } + if (sctp_state(asoc, COOKIE_WAIT)) + sctp_stream_update(&asoc->stream, &new->stream); - if (!asoc->assoc_id) { - /* get a new association id since we don't have one - * yet. - */ - sctp_assoc_set_id(asoc, GFP_ATOMIC); - } + /* get a new assoc id if we don't have one yet. */ + sctp_assoc_set_id(asoc, GFP_ATOMIC); } /* SCTP-AUTH: Save the peer parameters from the new associations diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 697721a7a3f1..81466f6442e8 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -307,7 +307,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && time_after(jiffies, chunk->msg->expires_at)) { struct sctp_stream_out *streamout = - &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream]; + &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream]; if (chunk->sent_count) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; @@ -320,7 +320,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && chunk->sent_count > chunk->sinfo.sinfo_timetolive) { struct sctp_stream_out *streamout = - &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream]; + &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream]; chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++; diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 4f5a2b580aa5..275925b93b29 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -35,6 +35,7 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb) { skb->ip_summed = CHECKSUM_NONE; + skb->csum_not_inet = 0; return sctp_compute_cksum(skb, skb_transport_offset(skb)); } @@ -98,6 +99,11 @@ static const struct net_offload sctp6_offload = { }, }; +static const struct skb_checksum_ops crc32c_csum_ops = { + .update = sctp_csum_update, + .combine = sctp_csum_combine, +}; + int __init sctp_offload_init(void) { int ret; @@ -110,6 +116,7 @@ int __init sctp_offload_init(void) if (ret) goto ipv4; + crc32c_csum_stub = &crc32c_csum_ops; return ret; ipv4: diff --git a/net/sctp/output.c b/net/sctp/output.c index 1409a875ad8e..e2edf2ebbade 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -538,6 +538,7 @@ merge: } else { chksum: head->ip_summed = CHECKSUM_PARTIAL; + head->csum_not_inet = 1; head->csum_start = skb_transport_header(head) - head->head; head->csum_offset = offsetof(struct sctphdr, checksum); } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index fe4c3d462f6e..20299df163b9 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -363,7 +363,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, sctp_insert_list(&asoc->outqueue.abandoned, &chk->transmitted_list); - streamout = &asoc->stream->out[chk->sinfo.sinfo_stream]; + streamout = &asoc->stream.out[chk->sinfo.sinfo_stream]; asoc->sent_cnt_removable--; asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; @@ -400,9 +400,9 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, q->out_qlen -= chk->skb->len; asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; - if (chk->sinfo.sinfo_stream < asoc->stream->outcnt) { + if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) { struct sctp_stream_out *streamout = - &asoc->stream->out[chk->sinfo.sinfo_stream]; + &asoc->stream.out[chk->sinfo.sinfo_stream]; streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; } @@ -1036,7 +1036,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) /* RFC 2960 6.5 Every DATA chunk MUST carry a valid * stream identifier. */ - if (chunk->sinfo.sinfo_stream >= asoc->stream->outcnt) { + if (chunk->sinfo.sinfo_stream >= asoc->stream.outcnt) { /* Mark as failed send. */ sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); @@ -1054,7 +1054,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) continue; } - if (asoc->stream->out[sid].state == SCTP_STREAM_CLOSED) { + if (asoc->stream.out[sid].state == SCTP_STREAM_CLOSED) { sctp_outq_head_data(q, chunk); goto sctp_flush_out; } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index a0b29d43627f..8e34db56bc1d 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -218,8 +218,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) return -ENOMEM; head = &sctp_ep_hashtable[hash]; - local_bh_disable(); - read_lock(&head->lock); + read_lock_bh(&head->lock); sctp_for_each_hentry(epb, &head->chain) { ep = sctp_ep(epb); sk = epb->sk; @@ -234,8 +233,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) sctp_seq_dump_local_addrs(seq, epb); seq_printf(seq, "\n"); } - read_unlock(&head->lock); - local_bh_enable(); + read_unlock_bh(&head->lock); return 0; } @@ -361,8 +359,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sctp_seq_dump_remote_addrs(seq, assoc); seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " "%8d %8d %8d %8d", - assoc->hbinterval, assoc->stream->incnt, - assoc->stream->outcnt, assoc->max_retrans, + assoc->hbinterval, assoc->stream.incnt, + assoc->stream.outcnt, assoc->max_retrans, assoc->init_retries, assoc->shutdown_retries, assoc->rtx_data_chunks, atomic_read(&sk->sk_wmem_alloc), diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 92e332e17391..ea2601501654 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1296,8 +1296,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(sctp_authhdr_t), &auth_hdr); - hmac = skb_put(retval->skb, hmac_desc->hmac_len); - memset(hmac, 0, hmac_desc->hmac_len); + hmac = skb_put_zero(retval->skb, hmac_desc->hmac_len); /* Adjust the chunk header to include the empty MAC */ retval->chunk_hdr->length = @@ -1544,7 +1543,7 @@ void sctp_chunk_assign_ssn(struct sctp_chunk *chunk) /* All fragments will be on the same stream */ sid = ntohs(chunk->subh.data_hdr->stream); - stream = chunk->asoc->stream; + stream = &chunk->asoc->stream; /* Now assign the sequence number to the entire message. * All fragments must have the same stream sequence number. @@ -2454,7 +2453,8 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * stream sequence number shall be set to 0. */ - if (sctp_stream_init(asoc, gfp)) + if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, + asoc->c.sinit_max_instreams, gfp)) goto clean_up; if (!asoc->temp && sctp_assoc_set_id(asoc, gfp)) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index f863b5573e42..df73190da761 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3958,7 +3958,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, /* Silently discard the chunk if stream-id is not valid */ sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->stream->incnt) + if (ntohs(skip->stream) >= asoc->stream.incnt) goto discard_noforce; } @@ -4029,7 +4029,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( /* Silently discard the chunk if stream-id is not valid */ sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->stream->incnt) + if (ntohs(skip->stream) >= asoc->stream.incnt) goto gen_shutdown; } @@ -6365,7 +6365,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * and discard the DATA chunk. */ sid = ntohs(data_hdr->stream); - if (sid >= asoc->stream->incnt) { + if (sid >= asoc->stream.incnt) { /* Mark tsn as received even though we drop it */ sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); @@ -6387,7 +6387,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * and is invalid. */ ssn = ntohs(data_hdr->ssn); - if (ordered && SSN_lt(ssn, sctp_ssn_peek(asoc->stream, in, sid))) + if (ordered && SSN_lt(ssn, sctp_ssn_peek(&asoc->stream, in, sid))) return SCTP_IERROR_PROTO_VIOLATION; /* Send the data up to the user. Note: Schedule the diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 30aa0a529215..039a93175adf 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -103,7 +103,7 @@ static int sctp_autobind(struct sock *sk); static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); -static int sctp_memory_pressure; +static unsigned long sctp_memory_pressure; static atomic_long_t sctp_memory_allocated; struct percpu_counter sctp_sockets_allocated; @@ -1494,7 +1494,7 @@ static void sctp_close(struct sock *sk, long timeout) pr_debug("%s: sk:%p, timeout:%ld\n", __func__, sk, timeout); - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); sk->sk_shutdown = SHUTDOWN_MASK; sk->sk_state = SCTP_SS_CLOSING; @@ -1544,7 +1544,7 @@ static void sctp_close(struct sock *sk, long timeout) * held and that should be grabbed before socket lock. */ spin_lock_bh(&net->sctp.addr_wq_lock); - bh_lock_sock(sk); + bh_lock_sock_nested(sk); /* Hold the sock, since sk_common_release() will put sock_put() * and we have just a little more cleanup. @@ -1920,7 +1920,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) } /* Check for invalid stream. */ - if (sinfo->sinfo_stream >= asoc->stream->outcnt) { + if (sinfo->sinfo_stream >= asoc->stream.outcnt) { err = -EINVAL; goto out_free; } @@ -4497,8 +4497,8 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, info->sctpi_rwnd = asoc->a_rwnd; info->sctpi_unackdata = asoc->unack_data; info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); - info->sctpi_instrms = asoc->stream->incnt; - info->sctpi_outstrms = asoc->stream->outcnt; + info->sctpi_instrms = asoc->stream.incnt; + info->sctpi_outstrms = asoc->stream.outcnt; list_for_each(pos, &asoc->base.inqueue.in_chunk_list) info->sctpi_inqueue++; list_for_each(pos, &asoc->outqueue.out_chunk_list) @@ -4727,8 +4727,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, status.sstat_unackdata = asoc->unack_data; status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); - status.sstat_instrms = asoc->stream->incnt; - status.sstat_outstrms = asoc->stream->outcnt; + status.sstat_instrms = asoc->stream.incnt; + status.sstat_outstrms = asoc->stream.outcnt; status.sstat_fragmentation_point = asoc->frag_point; status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc); memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr, @@ -6600,10 +6600,10 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, goto out; asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); - if (!asoc || params.sprstat_sid >= asoc->stream->outcnt) + if (!asoc || params.sprstat_sid >= asoc->stream.outcnt) goto out; - streamout = &asoc->stream->out[params.sprstat_sid]; + streamout = &asoc->stream.out[params.sprstat_sid]; if (policy == SCTP_PR_SCTP_NONE) { params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index dda53a293986..82e6d40052a8 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -35,70 +35,43 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp) +int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, + gfp_t gfp) { - struct sctp_stream *stream; - int i; - - stream = kzalloc(sizeof(*stream), gfp); - if (!stream) - return -ENOMEM; - - stream->outcnt = asoc->c.sinit_num_ostreams; - stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); - if (!stream->out) { - kfree(stream); - return -ENOMEM; - } - for (i = 0; i < stream->outcnt; i++) - stream->out[i].state = SCTP_STREAM_OPEN; - - asoc->stream = stream; - - return 0; -} - -int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp) -{ - struct sctp_stream *stream = asoc->stream; int i; /* Initial stream->out size may be very big, so free it and alloc * a new one with new outcnt to save memory. */ kfree(stream->out); - stream->outcnt = asoc->c.sinit_num_ostreams; - stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); + + stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp); if (!stream->out) - goto nomem; + return -ENOMEM; + stream->outcnt = outcnt; for (i = 0; i < stream->outcnt; i++) stream->out[i].state = SCTP_STREAM_OPEN; - stream->incnt = asoc->c.sinit_max_instreams; - stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp); + if (!incnt) + return 0; + + stream->in = kcalloc(incnt, sizeof(*stream->in), gfp); if (!stream->in) { kfree(stream->out); - goto nomem; + stream->out = NULL; + return -ENOMEM; } - return 0; - -nomem: - asoc->stream = NULL; - kfree(stream); + stream->incnt = incnt; - return -ENOMEM; + return 0; } void sctp_stream_free(struct sctp_stream *stream) { - if (unlikely(!stream)) - return; - kfree(stream->out); kfree(stream->in); - kfree(stream); } void sctp_stream_clear(struct sctp_stream *stream) @@ -112,6 +85,19 @@ void sctp_stream_clear(struct sctp_stream *stream) stream->in[i].ssn = 0; } +void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) +{ + sctp_stream_free(stream); + + stream->out = new->out; + stream->in = new->in; + stream->outcnt = new->outcnt; + stream->incnt = new->incnt; + + new->out = NULL; + new->in = NULL; +} + static int sctp_send_reconf(struct sctp_association *asoc, struct sctp_chunk *chunk) { @@ -128,7 +114,7 @@ static int sctp_send_reconf(struct sctp_association *asoc, int sctp_send_reset_streams(struct sctp_association *asoc, struct sctp_reset_streams *params) { - struct sctp_stream *stream = asoc->stream; + struct sctp_stream *stream = &asoc->stream; __u16 i, str_nums, *str_list; struct sctp_chunk *chunk; int retval = -EINVAL; @@ -214,6 +200,7 @@ out: int sctp_send_reset_assoc(struct sctp_association *asoc) { + struct sctp_stream *stream = &asoc->stream; struct sctp_chunk *chunk = NULL; int retval; __u16 i; @@ -230,8 +217,8 @@ int sctp_send_reset_assoc(struct sctp_association *asoc) return -ENOMEM; /* Block further xmit of data until this request is completed */ - for (i = 0; i < asoc->stream->outcnt; i++) - asoc->stream->out[i].state = SCTP_STREAM_CLOSED; + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_CLOSED; asoc->strreset_chunk = chunk; sctp_chunk_hold(asoc->strreset_chunk); @@ -241,8 +228,8 @@ int sctp_send_reset_assoc(struct sctp_association *asoc) sctp_chunk_put(asoc->strreset_chunk); asoc->strreset_chunk = NULL; - for (i = 0; i < asoc->stream->outcnt; i++) - asoc->stream->out[i].state = SCTP_STREAM_OPEN; + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; return retval; } @@ -255,7 +242,7 @@ int sctp_send_reset_assoc(struct sctp_association *asoc) int sctp_send_add_streams(struct sctp_association *asoc, struct sctp_add_streams *params) { - struct sctp_stream *stream = asoc->stream; + struct sctp_stream *stream = &asoc->stream; struct sctp_chunk *chunk = NULL; int retval = -ENOMEM; __u32 outcnt, incnt; @@ -357,7 +344,7 @@ struct sctp_chunk *sctp_process_strreset_outreq( struct sctp_ulpevent **evp) { struct sctp_strreset_outreq *outreq = param.v; - struct sctp_stream *stream = asoc->stream; + struct sctp_stream *stream = &asoc->stream; __u16 i, nums, flags = 0, *str_p = NULL; __u32 result = SCTP_STRRESET_DENIED; __u32 request_seq; @@ -449,7 +436,7 @@ struct sctp_chunk *sctp_process_strreset_inreq( struct sctp_ulpevent **evp) { struct sctp_strreset_inreq *inreq = param.v; - struct sctp_stream *stream = asoc->stream; + struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; struct sctp_chunk *chunk = NULL; __u16 i, nums, *str_p; @@ -523,7 +510,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( { __u32 init_tsn = 0, next_tsn = 0, max_tsn_seen; struct sctp_strreset_tsnreq *tsnreq = param.v; - struct sctp_stream *stream = asoc->stream; + struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; __u32 request_seq; __u16 i; @@ -612,7 +599,7 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out( struct sctp_ulpevent **evp) { struct sctp_strreset_addstrm *addstrm = param.v; - struct sctp_stream *stream = asoc->stream; + struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; struct sctp_stream_in *streamin; __u32 request_seq, incnt; @@ -687,7 +674,7 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in( struct sctp_ulpevent **evp) { struct sctp_strreset_addstrm *addstrm = param.v; - struct sctp_stream *stream = asoc->stream; + struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; struct sctp_stream_out *streamout; struct sctp_chunk *chunk = NULL; @@ -758,8 +745,8 @@ struct sctp_chunk *sctp_process_strreset_resp( union sctp_params param, struct sctp_ulpevent **evp) { + struct sctp_stream *stream = &asoc->stream; struct sctp_strreset_resp *resp = param.v; - struct sctp_stream *stream = asoc->stream; struct sctp_transport *t; __u16 i, nums, flags = 0; sctp_paramhdr_t *req; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index aa3624d50278..25f7e4140566 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -764,7 +764,7 @@ static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, __u16 sid, csid, cssn; sid = event->stream; - stream = ulpq->asoc->stream; + stream = &ulpq->asoc->stream; event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev; @@ -858,7 +858,7 @@ static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, /* Note: The stream ID must be verified before this routine. */ sid = event->stream; ssn = event->ssn; - stream = ulpq->asoc->stream; + stream = &ulpq->asoc->stream; /* Is this the expected SSN for this stream ID? */ if (ssn != sctp_ssn_peek(stream, in, sid)) { @@ -893,7 +893,7 @@ static void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid) struct sk_buff_head *lobby = &ulpq->lobby; __u16 csid, cssn; - stream = ulpq->asoc->stream; + stream = &ulpq->asoc->stream; /* We are holding the chunks by stream, by SSN. */ skb_queue_head_init(&temp); @@ -958,7 +958,7 @@ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn) struct sctp_stream *stream; /* Note: The stream ID must be verified before this routine. */ - stream = ulpq->asoc->stream; + stream = &ulpq->asoc->stream; /* Is this an old SSN? If so ignore. */ if (SSN_lt(ssn, sctp_ssn_peek(stream, in, sid))) diff --git a/net/socket.c b/net/socket.c index c2564eb25c6b..8f9dab330d57 100644 --- a/net/socket.c +++ b/net/socket.c @@ -461,7 +461,7 @@ EXPORT_SYMBOL(sock_from_file); * @err: pointer to an error code return * * The file handle passed in is locked and the socket it is bound - * too is returned. If an error occurs the err pointer is overwritten + * to is returned. If an error occurs the err pointer is overwritten * with a negative errno code and NULL is returned. The function checks * for both invalid handles and passing a handle which is not a socket. * @@ -662,6 +662,40 @@ static bool skb_is_err_queue(const struct sk_buff *skb) return skb->pkt_type == PACKET_OUTGOING; } +/* On transmit, software and hardware timestamps are returned independently. + * As the two skb clones share the hardware timestamp, which may be updated + * before the software timestamp is received, a hardware TX timestamp may be + * returned only if there is no software TX timestamp. Ignore false software + * timestamps, which may be made in the __sock_recv_timestamp() call when the + * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a + * hardware timestamp. + */ +static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp) +{ + return skb->tstamp && !false_tstamp && skb_is_err_queue(skb); +} + +static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb) +{ + struct scm_ts_pktinfo ts_pktinfo; + struct net_device *orig_dev; + + if (!skb_mac_header_was_set(skb)) + return; + + memset(&ts_pktinfo, 0, sizeof(ts_pktinfo)); + + rcu_read_lock(); + orig_dev = dev_get_by_napi_id(skb_napi_id(skb)); + if (orig_dev) + ts_pktinfo.if_index = orig_dev->ifindex; + rcu_read_unlock(); + + ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb); + put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO, + sizeof(ts_pktinfo), &ts_pktinfo); +} + /* * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) */ @@ -670,14 +704,16 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, { int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); struct scm_timestamping tss; - int empty = 1; + int empty = 1, false_tstamp = 0; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); /* Race occurred between timestamp enabling and packet receiving. Fill in the current time for now. */ - if (need_software_tstamp && skb->tstamp == 0) + if (need_software_tstamp && skb->tstamp == 0) { __net_timestamp(skb); + false_tstamp = 1; + } if (need_software_tstamp) { if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { @@ -699,8 +735,13 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, empty = 0; if (shhwtstamps && (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && - ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) + !skb_is_swtx_tstamp(skb, false_tstamp) && + ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { empty = 0; + if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && + !skb_is_err_queue(skb)) + put_ts_pktinfo(msg, skb); + } if (!empty) { put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING, sizeof(tss), &tss); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8d40a7d31c99..25dc67ef9d37 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -571,24 +571,17 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, } EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); -static RAW_NOTIFIER_HEAD(switchdev_notif_chain); +static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); /** * register_switchdev_notifier - Register notifier * @nb: notifier_block * - * Register switch device notifier. This should be used by code - * which needs to monitor events happening in particular device. - * Return values are same as for atomic_notifier_chain_register(). + * Register switch device notifier. */ int register_switchdev_notifier(struct notifier_block *nb) { - int err; - - rtnl_lock(); - err = raw_notifier_chain_register(&switchdev_notif_chain, nb); - rtnl_unlock(); - return err; + return atomic_notifier_chain_register(&switchdev_notif_chain, nb); } EXPORT_SYMBOL_GPL(register_switchdev_notifier); @@ -597,16 +590,10 @@ EXPORT_SYMBOL_GPL(register_switchdev_notifier); * @nb: notifier_block * * Unregister switch device notifier. - * Return values are same as for atomic_notifier_chain_unregister(). */ int unregister_switchdev_notifier(struct notifier_block *nb) { - int err; - - rtnl_lock(); - err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); - rtnl_unlock(); - return err; + return atomic_notifier_chain_unregister(&switchdev_notif_chain, nb); } EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); @@ -616,18 +603,13 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); * @dev: port device * @info: notifier information data * - * Call all network notifier blocks. This should be called by driver - * when it needs to propagate hardware event. - * Return values are same as for atomic_notifier_call_chain(). - * rtnl_lock must be held. + * Call all network notifier blocks. */ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info) { - ASSERT_RTNL(); - info->dev = dev; - return raw_notifier_call_chain(&switchdev_notif_chain, val, info); + return atomic_notifier_call_chain(&switchdev_notif_chain, val, info); } EXPORT_SYMBOL_GPL(call_switchdev_notifiers); diff --git a/net/wireless/core.c b/net/wireless/core.c index 83ea164f16b3..7b33e8c366bc 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -711,6 +711,11 @@ int wiphy_register(struct wiphy *wiphy) (wiphy->bss_select_support & ~(BIT(__NL80211_BSS_SELECT_ATTR_AFTER_LAST) - 2)))) return -EINVAL; + if (WARN_ON(wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X) && + (!rdev->ops->set_pmk || !rdev->ops->del_pmk))) + return -EINVAL; + if (wiphy->addresses) memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN); diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index ec0b1c20ac99..421a6b80ec62 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -174,6 +174,14 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, scan_width); } + err = cfg80211_chandef_dfs_required(&rdev->wiphy, + &setup->chandef, + NL80211_IFTYPE_MESH_POINT); + if (err < 0) + return err; + if (err > 0 && !setup->userspace_handles_dfs) + return -EINVAL; + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef, NL80211_IFTYPE_MESH_POINT)) return -EINVAL; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c3bc9da30cff..5487cd775b6f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7501,6 +7501,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) static struct nlattr *csa_attrs[NL80211_ATTR_MAX+1]; int err; bool need_new_beacon = false; + bool need_handle_dfs_flag = true; int len, i; u32 cs_count; @@ -7512,6 +7513,12 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: need_new_beacon = true; + /* For all modes except AP the handle_dfs flag needs to be + * supplied to tell the kernel that userspace will handle radar + * events when they happen. Otherwise a switch to a channel + * requiring DFS will be rejected. + */ + need_handle_dfs_flag = false; /* useless if AP is not running */ if (!wdev->beacon_interval) @@ -7634,8 +7641,13 @@ skip_beacons: if (err < 0) return err; - if (err > 0) + if (err > 0) { params.radar_required = true; + if (need_handle_dfs_flag && + !nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS])) { + return -EINVAL; + } + } if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX]) params.block_tx = true; @@ -8156,6 +8168,15 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, memcpy(settings->akm_suites, data, len); } + if (info->attrs[NL80211_ATTR_PMK]) { + if (nla_len(info->attrs[NL80211_ATTR_PMK]) != WLAN_PMK_LEN) + return -EINVAL; + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK)) + return -EINVAL; + settings->psk = nla_data(info->attrs[NL80211_ATTR_PMK]); + } + return 0; } @@ -8860,6 +8881,12 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) connect.privacy = info->attrs[NL80211_ATTR_PRIVACY]; + if (info->attrs[NL80211_ATTR_WANT_1X_4WAY_HS] && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X)) + return -EINVAL; + connect.want_1x = info->attrs[NL80211_ATTR_WANT_1X_4WAY_HS]; + err = nl80211_crypto_settings(rdev, info, &connect.crypto, NL80211_MAX_NR_CIPHER_SUITES); if (err) @@ -9962,6 +9989,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) return err; } + setup.userspace_handles_dfs = + nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]); + return cfg80211_join_mesh(rdev, dev, &setup, &cfg); } @@ -12241,6 +12271,90 @@ static int nl80211_set_multicast_to_unicast(struct sk_buff *skb, return rdev_set_multicast_to_unicast(rdev, dev, enabled); } +static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_pmk_conf pmk_conf = {}; + int ret; + + if (wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) + return -EOPNOTSUPP; + + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X)) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_PMK]) + return -EINVAL; + + wdev_lock(wdev); + if (!wdev->current_bss) { + ret = -ENOTCONN; + goto out; + } + + pmk_conf.aa = nla_data(info->attrs[NL80211_ATTR_MAC]); + if (memcmp(pmk_conf.aa, wdev->current_bss->pub.bssid, ETH_ALEN)) { + ret = -EINVAL; + goto out; + } + + pmk_conf.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]); + pmk_conf.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]); + if (pmk_conf.pmk_len != WLAN_PMK_LEN && + pmk_conf.pmk_len != WLAN_PMK_LEN_SUITE_B_192) { + ret = -EINVAL; + goto out; + } + + if (info->attrs[NL80211_ATTR_PMKR0_NAME]) { + int r0_name_len = nla_len(info->attrs[NL80211_ATTR_PMKR0_NAME]); + + if (r0_name_len != WLAN_PMK_NAME_LEN) { + ret = -EINVAL; + goto out; + } + + pmk_conf.pmk_r0_name = + nla_data(info->attrs[NL80211_ATTR_PMKR0_NAME]); + } + + ret = rdev_set_pmk(rdev, dev, &pmk_conf); +out: + wdev_unlock(wdev); + return ret; +} + +static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + const u8 *aa; + int ret; + + if (wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) + return -EOPNOTSUPP; + + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X)) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_MAC]) + return -EINVAL; + + wdev_lock(wdev); + aa = nla_data(info->attrs[NL80211_ATTR_MAC]); + ret = rdev_del_pmk(rdev, dev, aa); + wdev_unlock(wdev); + + return ret; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -13116,6 +13230,21 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_PMK, + .doit = nl80211_set_pmk, + .policy = nl80211_policy, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_DEL_PMK, + .doit = nl80211_del_pmk, + .policy = nl80211_policy, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + }; static struct genl_family nl80211_fam __ro_after_init = { @@ -13671,7 +13800,9 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, info->req_ie)) || (info->resp_ie && nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len, - info->resp_ie))) + info->resp_ie)) || + (info->authorized && + nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED))) goto nla_put_failure; genlmsg_end(msg, hdr); diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 0598c1e5d0ad..ce23d7d49960 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1164,4 +1164,29 @@ rdev_set_coalesce(struct cfg80211_registered_device *rdev, trace_rdev_return_int(&rdev->wiphy, ret); return ret; } + +static inline int rdev_set_pmk(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_pmk_conf *pmk_conf) +{ + int ret = -EOPNOTSUPP; + + trace_rdev_set_pmk(&rdev->wiphy, dev, pmk_conf); + if (rdev->ops->set_pmk) + ret = rdev->ops->set_pmk(&rdev->wiphy, dev, pmk_conf); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline int rdev_del_pmk(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *aa) +{ + int ret = -EOPNOTSUPP; + + trace_rdev_del_pmk(&rdev->wiphy, dev, aa); + if (rdev->ops->del_pmk) + ret = rdev->ops->del_pmk(&rdev->wiphy, dev, aa); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 532a0007ce82..0a49b88070d0 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -960,6 +960,7 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, ev->rm.resp_ie_len = info->resp_ie_len; memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len); ev->rm.bss = info->bss; + ev->rm.authorized = info->authorized; spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ca8b2059f92c..0f8db41eaddb 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2258,6 +2258,66 @@ TRACE_EVENT(rdev_tdls_cancel_channel_switch, WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(addr)) ); +TRACE_EVENT(rdev_set_pmk, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_pmk_conf *pmk_conf), + + TP_ARGS(wiphy, netdev, pmk_conf), + + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(aa) + __field(u8, pmk_len) + __field(u8, pmk_r0_name_len) + __dynamic_array(u8, pmk, pmk_conf->pmk_len) + __dynamic_array(u8, pmk_r0_name, WLAN_PMK_NAME_LEN) + ), + + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(aa, pmk_conf->aa); + __entry->pmk_len = pmk_conf->pmk_len; + __entry->pmk_r0_name_len = + pmk_conf->pmk_r0_name ? WLAN_PMK_NAME_LEN : 0; + memcpy(__get_dynamic_array(pmk), pmk_conf->pmk, + pmk_conf->pmk_len); + memcpy(__get_dynamic_array(pmk_r0_name), pmk_conf->pmk_r0_name, + pmk_conf->pmk_r0_name ? WLAN_PMK_NAME_LEN : 0); + ), + + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT + "pmk_len=%u, pmk: %s pmk_r0_name: %s", WIPHY_PR_ARG, + NETDEV_PR_ARG, MAC_PR_ARG(aa), __entry->pmk_len, + __print_array(__get_dynamic_array(pmk), + __get_dynamic_array_len(pmk), 1), + __entry->pmk_r0_name_len ? + __print_array(__get_dynamic_array(pmk_r0_name), + __get_dynamic_array_len(pmk_r0_name), 1) : "") +); + +TRACE_EVENT(rdev_del_pmk, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *aa), + + TP_ARGS(wiphy, netdev, aa), + + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(aa) + ), + + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(aa, aa); + ), + + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(aa)) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ diff --git a/net/wireless/util.c b/net/wireless/util.c index 4992f1025c9d..96613fe2c6b1 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1219,8 +1219,8 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) u32 bitrate; int idx; - if (WARN_ON_ONCE(rate->mcs > 9)) - return 0; + if (rate->mcs > 9) + goto warn; switch (rate->bw) { case RATE_INFO_BW_160: @@ -1235,8 +1235,7 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) case RATE_INFO_BW_5: case RATE_INFO_BW_10: default: - WARN_ON(1); - /* fall through */ + goto warn; case RATE_INFO_BW_20: idx = 0; } @@ -1249,6 +1248,10 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) /* do NOT round down here */ return (bitrate + 50000) / 100000; + warn: + WARN_ONCE(1, "invalid rate bw=%d, mcs=%d, nss=%d\n", + rate->bw, rate->mcs, rate->nss); + return 0; } u32 cfg80211_calculate_bitrate(struct rate_info *rate) |