diff options
Diffstat (limited to 'net')
262 files changed, 5573 insertions, 4346 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 9424f3718ea7..2fb2d88e8c2e 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -341,7 +341,7 @@ static void __vlan_device_event(struct net_device *dev, unsigned long event) static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vlan_group *grp; struct vlan_info *vlan_info; int i, flgs; diff --git a/net/Kconfig b/net/Kconfig index 2ddc9046868e..51da83943847 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -218,6 +218,7 @@ source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" source "net/vmw_vsock/Kconfig" source "net/netlink/Kconfig" +source "net/mpls/Kconfig" config RPS boolean @@ -242,6 +243,10 @@ config NETPRIO_CGROUP Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis +config NET_LL_RX_POLL + boolean + default y + config BQL boolean depends on SYSFS @@ -259,6 +264,18 @@ config BPF_JIT packet sniffing (libpcap/tcpdump). Note : Admin should enable this feature changing /proc/sys/net/core/bpf_jit_enable +config NET_FLOW_LIMIT + boolean + depends on RPS + default y + ---help--- + The network stack has to drop packets when a receive processing CPU's + backlog reaches netdev_max_backlog. If a few out of many active flows + generate the vast majority of load, drop their traffic earlier to + maintain capacity for the other flows. This feature provides servers + with many clients some protection against DoS by a single (spoofed) + flow that greatly exceeds average workload. + menu "Network testing" config NET_PKTGEN diff --git a/net/Makefile b/net/Makefile index 091e7b04f301..9492e8cb64e9 100644 --- a/net/Makefile +++ b/net/Makefile @@ -70,3 +70,4 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ obj-$(CONFIG_VSOCKETS) += vmw_vsock/ +obj-$(CONFIG_NET_MPLS_GSO) += mpls/ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 173a2e82f486..690356fa52b9 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -332,7 +332,7 @@ static void aarp_expire_timeout(unsigned long unused) static int aarp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); int ct; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index ef12839a7cfe..7fee50d637f9 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -644,7 +644,7 @@ static inline void atalk_dev_down(struct net_device *dev) static int ddp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/atm/clip.c b/net/atm/clip.c index 8ae3a7879335..8215f7cb170b 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -539,9 +539,9 @@ static int clip_create(int number) } static int clip_device_event(struct notifier_block *this, unsigned long event, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; @@ -575,6 +575,7 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event, void *ifa) { struct in_device *in_dev; + struct netdev_notifier_info info; in_dev = ((struct in_ifaddr *)ifa)->ifa_dev; /* @@ -583,7 +584,8 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event, */ if (event != NETDEV_UP) return NOTIFY_DONE; - return clip_device_event(this, NETDEV_CHANGE, in_dev->dev); + netdev_notifier_info_init(&info, in_dev->dev); + return clip_device_event(this, NETDEV_CHANGE, &info); } static struct notifier_block clip_dev_notifier = { diff --git a/net/atm/mpc.c b/net/atm/mpc.c index d4cc1be5c364..3af12755cd04 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -998,14 +998,12 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc) } static int mpoa_event_listener(struct notifier_block *mpoa_notifier, - unsigned long event, void *dev_ptr) + unsigned long event, void *ptr) { - struct net_device *dev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mpoa_client *mpc; struct lec_priv *priv; - dev = dev_ptr; - if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index e277e38f736b..4b4d2b779ec1 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -111,9 +111,9 @@ again: * Handle device status changes. */ static int ax25_device_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; @@ -1974,7 +1974,7 @@ static struct packet_type ax25_packet_type __read_mostly = { }; static struct notifier_block ax25_dev_notifier = { - .notifier_call =ax25_device_event, + .notifier_call = ax25_device_event, }; static int __init ax25_init(void) diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index d5744b752511..919a5ce47515 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -29,7 +29,7 @@ static int min_proto[1], max_proto[] = { AX25_PROTO_MAX }; static int min_ds_timeout[1], max_ds_timeout[] = {65535000}; #endif -static const ctl_table ax25_param_table[] = { +static const struct ctl_table ax25_param_table[] = { { .procname = "ip_default_mode", .maxlen = sizeof(int), diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index acbac2a9c62f..489bb36f1b94 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -32,7 +32,6 @@ batman-adv-y += icmp_socket.o batman-adv-y += main.o batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o batman-adv-y += originator.o -batman-adv-y += ring_buffer.o batman-adv-y += routing.o batman-adv-y += send.o batman-adv-y += soft-interface.o diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index f680ee101878..62da5278014a 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -19,7 +19,6 @@ #include "main.h" #include "translation-table.h" -#include "ring_buffer.h" #include "originator.h" #include "routing.h" #include "gateway_common.h" @@ -30,6 +29,49 @@ #include "network-coding.h" /** + * batadv_ring_buffer_set - update the ring buffer with the given value + * @lq_recv: pointer to the ring buffer + * @lq_index: index to store the value at + * @value: value to store in the ring buffer + */ +static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, + uint8_t value) +{ + lq_recv[*lq_index] = value; + *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE; +} + +/** + * batadv_ring_buffer_set - compute the average of all non-zero values stored + * in the given ring buffer + * @lq_recv: pointer to the ring buffer + * + * Returns computed average value. + */ +static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]) +{ + const uint8_t *ptr; + uint16_t count = 0, i = 0, sum = 0; + + ptr = lq_recv; + + while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) { + if (*ptr != 0) { + count++; + sum += *ptr; + } + + i++; + ptr++; + } + + if (count == 0) + return 0; + + return (uint8_t)(sum / count); +} + +/* * batadv_dup_status - duplicate status * @BATADV_NO_DUP: the packet is a duplicate * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the @@ -48,12 +90,11 @@ static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, const uint8_t *neigh_addr, struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh, __be32 seqno) + struct batadv_orig_node *orig_neigh) { struct batadv_neigh_node *neigh_node; - neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, - ntohl(seqno)); + neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr); if (!neigh_node) goto out; @@ -428,18 +469,16 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, else skb_size = packet_len; - skb_size += ETH_HLEN + NET_IP_ALIGN; + skb_size += ETH_HLEN; - forw_packet_aggr->skb = dev_alloc_skb(skb_size); + forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); if (!forw_packet_aggr->skb) { if (!own_packet) atomic_inc(&bat_priv->batman_queue_left); kfree(forw_packet_aggr); goto out; } - skb_reserve(forw_packet_aggr->skb, ETH_HLEN + NET_IP_ALIGN); - - INIT_HLIST_NODE(&forw_packet_aggr->list); + skb_reserve(forw_packet_aggr->skb, ETH_HLEN); skb_buff = skb_put(forw_packet_aggr->skb, packet_len); forw_packet_aggr->packet_len = packet_len; @@ -605,6 +644,41 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, if_incoming, 0, batadv_iv_ogm_fwd_send_time()); } +/** + * batadv_iv_ogm_slide_own_bcast_window - bitshift own OGM broadcast windows for + * the given interface + * @hard_iface: the interface for which the windows have to be shifted + */ +static void +batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) +{ + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + struct batadv_orig_node *orig_node; + unsigned long *word; + uint32_t i; + size_t word_index; + uint8_t *w; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + spin_lock_bh(&orig_node->ogm_cnt_lock); + word_index = hard_iface->if_num * BATADV_NUM_WORDS; + word = &(orig_node->bcast_own[word_index]); + + batadv_bit_get_packet(bat_priv, word, 1, 0); + w = &orig_node->bcast_own_sum[hard_iface->if_num]; + *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE); + spin_unlock_bh(&orig_node->ogm_cnt_lock); + } + rcu_read_unlock(); + } +} + static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); @@ -649,7 +723,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) batadv_ogm_packet->gw_flags = BATADV_NO_FLAGS; } - batadv_slide_own_bcast_window(hard_iface); + batadv_iv_ogm_slide_own_bcast_window(hard_iface); batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff, hard_iface->bat_iv.ogm_buff_len, hard_iface, 1, batadv_iv_ogm_emit_send_time(bat_priv)); @@ -685,7 +759,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && tmp_neigh_node->if_incoming == if_incoming && atomic_inc_not_zero(&tmp_neigh_node->refcount)) { - if (neigh_node) + if (WARN(neigh_node, "too many matching neigh_nodes")) batadv_neigh_node_free_ref(neigh_node); neigh_node = tmp_neigh_node; continue; @@ -711,8 +785,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, neigh_node = batadv_iv_ogm_neigh_new(if_incoming, ethhdr->h_source, - orig_node, orig_tmp, - batadv_ogm_packet->seqno); + orig_node, orig_tmp); batadv_orig_node_free_ref(orig_tmp); if (!neigh_node) @@ -844,8 +917,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, neigh_node = batadv_iv_ogm_neigh_new(if_incoming, orig_neigh_node->orig, orig_neigh_node, - orig_neigh_node, - batadv_ogm_packet->seqno); + orig_neigh_node); if (!neigh_node) goto out; @@ -1013,7 +1085,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, struct batadv_neigh_node *orig_neigh_router = NULL; int has_directlink_flag; int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; - int is_broadcast = 0, is_bidirect; + int is_bidirect; bool is_single_hop_neigh = false; bool is_from_best_next_hop = false; int sameseq, similar_ttl; @@ -1077,19 +1149,9 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, if (batadv_compare_eth(batadv_ogm_packet->prev_sender, hard_iface->net_dev->dev_addr)) is_my_oldorig = 1; - - if (is_broadcast_ether_addr(ethhdr->h_source)) - is_broadcast = 1; } rcu_read_unlock(); - if (batadv_ogm_packet->header.version != BATADV_COMPAT_VERSION) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: incompatible batman version (%i)\n", - batadv_ogm_packet->header.version); - return; - } - if (is_my_addr) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: received my own broadcast (sender: %pM)\n", @@ -1097,13 +1159,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, return; } - if (is_broadcast) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: ignoring all packets with broadcast source addr (sender: %pM)\n", - ethhdr->h_source); - return; - } - if (is_my_orig) { unsigned long *word; int offset; @@ -1312,7 +1367,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, skb->len + ETH_HLEN); packet_len = skb_headlen(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); packet_buff = skb->data; batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index de27b3175cfd..e14531f1ce1c 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -180,7 +180,7 @@ static struct batadv_bla_claim */ static struct batadv_bla_backbone_gw * batadv_backbone_hash_find(struct batadv_priv *bat_priv, - uint8_t *addr, short vid) + uint8_t *addr, unsigned short vid) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; @@ -257,7 +257,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) */ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, - short vid, int claimtype) + unsigned short vid, int claimtype) { struct sk_buff *skb; struct ethhdr *ethhdr; @@ -307,7 +307,8 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, */ memcpy(ethhdr->h_source, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): CLAIM %pM on vid %d\n", mac, vid); + "bla_send_claim(): CLAIM %pM on vid %d\n", mac, + BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_UNCLAIM: /* unclaim frame @@ -316,7 +317,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): UNCLAIM %pM on vid %d\n", mac, - vid); + BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_ANNOUNCE: /* announcement frame @@ -325,7 +326,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): ANNOUNCE of %pM on vid %d\n", - ethhdr->h_source, vid); + ethhdr->h_source, BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_REQUEST: /* request frame @@ -335,13 +336,15 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); memcpy(ethhdr->h_dest, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): REQUEST of %pM to %pMon vid %d\n", - ethhdr->h_source, ethhdr->h_dest, vid); + "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n", + ethhdr->h_source, ethhdr->h_dest, + BATADV_PRINT_VID(vid)); break; } - if (vid != -1) - skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), vid); + if (vid & BATADV_VLAN_HAS_TAG) + skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), + vid & VLAN_VID_MASK); skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, soft_iface); @@ -367,7 +370,7 @@ out: */ static struct batadv_bla_backbone_gw * batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, - short vid, bool own_backbone) + unsigned short vid, bool own_backbone) { struct batadv_bla_backbone_gw *entry; struct batadv_orig_node *orig_node; @@ -380,7 +383,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n", - orig, vid); + orig, BATADV_PRINT_VID(vid)); entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) @@ -434,7 +437,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, static void batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -456,7 +459,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, */ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - short vid) + unsigned short vid) { struct hlist_head *head; struct batadv_hashtable *hash; @@ -547,7 +550,7 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, * @backbone_gw: the backbone gateway which claims it */ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, - const uint8_t *mac, const short vid, + const uint8_t *mac, const unsigned short vid, struct batadv_bla_backbone_gw *backbone_gw) { struct batadv_bla_claim *claim; @@ -572,7 +575,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, atomic_set(&claim->refcount, 2); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); hash_added = batadv_hash_add(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim, @@ -591,7 +594,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): changing ownership for %pM, vid %d\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); batadv_backbone_gw_free_ref(claim->backbone_gw); @@ -611,7 +614,7 @@ claim_free_ref: * given mac address and vid. */ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, - const uint8_t *mac, const short vid) + const uint8_t *mac, const unsigned short vid) { struct batadv_bla_claim search_claim, *claim; @@ -622,7 +625,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, return; batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim); @@ -637,7 +640,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, /* check for ANNOUNCE frame, return 1 if handled */ static int batadv_handle_announce(struct batadv_priv *bat_priv, uint8_t *an_addr, uint8_t *backbone_addr, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; uint16_t crc; @@ -658,12 +661,13 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", - vid, backbone_gw->orig, crc); + BATADV_PRINT_VID(vid), backbone_gw->orig, crc); if (backbone_gw->crc != crc) { batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", - backbone_gw->orig, backbone_gw->vid, + backbone_gw->orig, + BATADV_PRINT_VID(backbone_gw->vid), backbone_gw->crc, crc); batadv_bla_send_request(backbone_gw); @@ -685,7 +689,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, static int batadv_handle_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, - struct ethhdr *ethhdr, short vid) + struct ethhdr *ethhdr, unsigned short vid) { /* check for REQUEST frame */ if (!batadv_compare_eth(backbone_addr, ethhdr->h_dest)) @@ -699,7 +703,7 @@ static int batadv_handle_request(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_request(): REQUEST vid %d (sent by %pM)...\n", - vid, ethhdr->h_source); + BATADV_PRINT_VID(vid), ethhdr->h_source); batadv_bla_answer_request(bat_priv, primary_if, vid); return 1; @@ -709,7 +713,7 @@ static int batadv_handle_request(struct batadv_priv *bat_priv, static int batadv_handle_unclaim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, - uint8_t *claim_addr, short vid) + uint8_t *claim_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -727,7 +731,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, /* this must be an UNCLAIM frame */ batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n", - claim_addr, vid, backbone_gw->orig); + claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig); batadv_bla_del_claim(bat_priv, claim_addr, vid); batadv_backbone_gw_free_ref(backbone_gw); @@ -738,7 +742,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, static int batadv_handle_claim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, uint8_t *claim_addr, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -861,14 +865,15 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, struct batadv_bla_claim_dst *bla_dst; uint16_t proto; int headlen; - short vid = -1; + unsigned short vid = BATADV_NO_FLAGS; int ret; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { vhdr = (struct vlan_ethhdr *)ethhdr; vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + vid |= BATADV_VLAN_HAS_TAG; proto = ntohs(vhdr->h_vlan_encapsulated_proto); headlen = sizeof(*vhdr); } else { @@ -885,7 +890,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, return 0; /* pskb_may_pull() may have modified the pointers, get ethhdr again */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); arphdr = (struct arphdr *)((uint8_t *)ethhdr + headlen); /* Check whether the ARP frame carries a valid @@ -910,7 +915,8 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, if (ret == 1) batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, vid, hw_src, hw_dst); + ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, + hw_dst); if (ret < 2) return ret; @@ -945,7 +951,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, vid, hw_src, hw_dst); + ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst); return 1; } @@ -1362,7 +1368,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct ethhdr *ethhdr; struct vlan_ethhdr *vhdr; struct batadv_bla_backbone_gw *backbone_gw; - short vid = -1; + unsigned short vid = BATADV_NO_FLAGS; if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance)) return 0; @@ -1379,6 +1385,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size); vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + vid |= BATADV_VLAN_HAS_TAG; } /* see if this originator is a backbone gw for this VLAN */ @@ -1428,15 +1435,15 @@ void batadv_bla_free(struct batadv_priv *bat_priv) * returns 1, otherwise it returns 0 and the caller shall further * process the skb. */ -int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, - bool is_bcast) +int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, bool is_bcast) { struct ethhdr *ethhdr; struct batadv_bla_claim search_claim, *claim = NULL; struct batadv_hard_iface *primary_if; int ret; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) @@ -1523,7 +1530,8 @@ out: * returns 1, otherwise it returns 0 and the caller shall further * process the skb. */ -int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) +int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid) { struct ethhdr *ethhdr; struct batadv_bla_claim search_claim, *claim = NULL; @@ -1543,7 +1551,7 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) if (batadv_bla_process_claim(bat_priv, primary_if, skb)) goto handled; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); if (unlikely(atomic_read(&bat_priv->bla.num_requests))) /* don't allow broadcasts while requests are in flight */ @@ -1627,8 +1635,8 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) hlist_for_each_entry_rcu(claim, head, hash_entry) { is_own = batadv_compare_eth(claim->backbone_gw->orig, primary_addr); - seq_printf(seq, " * %pM on % 5d by %pM [%c] (%#.4x)\n", - claim->addr, claim->vid, + seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n", + claim->addr, BATADV_PRINT_VID(claim->vid), claim->backbone_gw->orig, (is_own ? 'x' : ' '), claim->backbone_gw->crc); @@ -1680,10 +1688,10 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) if (is_own) continue; - seq_printf(seq, - " * %pM on % 5d % 4i.%03is (%#.4x)\n", - backbone_gw->orig, backbone_gw->vid, - secs, msecs, backbone_gw->crc); + seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n", + backbone_gw->orig, + BATADV_PRINT_VID(backbone_gw->vid), secs, + msecs, backbone_gw->crc); } rcu_read_unlock(); } diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index dea2fbc5d98d..4b102e71e5bd 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -21,9 +21,10 @@ #define _NET_BATMAN_ADV_BLA_H_ #ifdef CONFIG_BATMAN_ADV_BLA -int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, - bool is_bcast); -int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid); +int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, bool is_bcast); +int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid); int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct batadv_orig_node *orig_node, int hdr_size); int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); @@ -42,13 +43,14 @@ void batadv_bla_free(struct batadv_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_BLA */ static inline int batadv_bla_rx(struct batadv_priv *bat_priv, - struct sk_buff *skb, short vid, bool is_bcast) + struct sk_buff *skb, unsigned short vid, + bool is_bcast) { return 0; } static inline int batadv_bla_tx(struct batadv_priv *bat_priv, - struct sk_buff *skb, short vid) + struct sk_buff *skb, unsigned short vid) { return 0; } diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 239992021b1d..06345d401588 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -45,9 +45,9 @@ static void batadv_dat_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_dat_entry_free_ref - decrements the dat_entry refcounter and possibly + * batadv_dat_entry_free_ref - decrement the dat_entry refcounter and possibly * free it - * @dat_entry: the oentry to free + * @dat_entry: the entry to free */ static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) { @@ -56,10 +56,10 @@ static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) } /** - * batadv_dat_to_purge - checks whether a dat_entry has to be purged or not + * batadv_dat_to_purge - check whether a dat_entry has to be purged or not * @dat_entry: the entry to check * - * Returns true if the entry has to be purged now, false otherwise + * Returns true if the entry has to be purged now, false otherwise. */ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) { @@ -75,8 +75,8 @@ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) * returns a boolean value: true is the entry has to be deleted, * false otherwise * - * Loops over each entry in the DAT local storage and delete it if and only if - * the to_purge function passed as argument returns true + * Loops over each entry in the DAT local storage and deletes it if and only if + * the to_purge function passed as argument returns true. */ static void __batadv_dat_purge(struct batadv_priv *bat_priv, bool (*to_purge)(struct batadv_dat_entry *)) @@ -97,7 +97,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, spin_lock_bh(list_lock); hlist_for_each_entry_safe(dat_entry, node_tmp, head, hash_entry) { - /* if an helper function has been passed as parameter, + /* if a helper function has been passed as parameter, * ask it if the entry has to be purged or not */ if (to_purge && !to_purge(dat_entry)) @@ -134,7 +134,7 @@ static void batadv_dat_purge(struct work_struct *work) * @node: node in the local table * @data2: second object to compare the node to * - * Returns 1 if the two entry are the same, 0 otherwise + * Returns 1 if the two entries are the same, 0 otherwise. */ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) { @@ -149,7 +149,7 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the hw_src field in the ARP packet + * Returns the value of the hw_src field in the ARP packet. */ static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) { @@ -166,7 +166,7 @@ static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the ip_src field in the ARP packet + * Returns the value of the ip_src field in the ARP packet. */ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) { @@ -178,7 +178,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the hw_dst field in the ARP packet + * Returns the value of the hw_dst field in the ARP packet. */ static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) { @@ -190,7 +190,7 @@ static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the ip_dst field in the ARP packet + * Returns the value of the ip_dst field in the ARP packet. */ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) { @@ -202,7 +202,7 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) * @data: data to hash * @size: size of the hash table * - * Returns the selected index in the hash table for the given data + * Returns the selected index in the hash table for the given data. */ static uint32_t batadv_hash_dat(const void *data, uint32_t size) { @@ -224,12 +224,12 @@ static uint32_t batadv_hash_dat(const void *data, uint32_t size) } /** - * batadv_dat_entry_hash_find - looks for a given dat_entry in the local hash + * batadv_dat_entry_hash_find - look for a given dat_entry in the local hash * table * @bat_priv: the bat priv with all the soft interface information * @ip: search key * - * Returns the dat_entry if found, NULL otherwise + * Returns the dat_entry if found, NULL otherwise. */ static struct batadv_dat_entry * batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) @@ -343,9 +343,6 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, if (hdr_size == 0) return; - /* if the ARP packet is encapsulated in a batman packet, let's print - * some debug messages - */ unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; switch (unicast_4addr_packet->u.header.packet_type) { @@ -409,7 +406,8 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, * @candidate: orig_node under evaluation * @max_orig_node: last selected candidate * - * Returns true if the node has been elected as next candidate or false othrwise + * Returns true if the node has been elected as next candidate or false + * otherwise. */ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, int select, batadv_dat_addr_t tmp_max, @@ -472,7 +470,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, */ cands[select].type = BATADV_DAT_CANDIDATE_NOT_FOUND; - /* iterate over the originator list and find the node with closest + /* iterate over the originator list and find the node with the closest * dat_address which has not been selected yet */ for (i = 0; i < hash->size; i++) { @@ -480,7 +478,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - /* the dht space is a ring and addresses are unsigned */ + /* the dht space is a ring using unsigned addresses */ tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr + ip_key; @@ -512,7 +510,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, } /** - * batadv_dat_select_candidates - selects the nodes which the DHT message has to + * batadv_dat_select_candidates - select the nodes which the DHT message has to * be sent to * @bat_priv: the bat priv with all the soft interface information * @ip_dst: ipv4 to look up in the DHT @@ -521,7 +519,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * closest values (from the LEFT, with wrap around if needed) then the hash * value of the key. ip_dst is the key. * - * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM + * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM. */ static struct batadv_dat_candidate * batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) @@ -558,10 +556,11 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) * @ip: the DHT key * @packet_subtype: unicast4addr packet subtype to use * - * In this function the skb is copied by means of pskb_copy() and is sent as - * unicast packet to each of the selected candidates + * This function copies the skb with pskb_copy() and is sent as unicast packet + * to each of the selected candidates. * - * Returns true if the packet is sent to at least one candidate, false otherwise + * Returns true if the packet is sent to at least one candidate, false + * otherwise. */ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *skb, __be32 ip, @@ -727,7 +726,7 @@ out: * @skb: packet to analyse * @hdr_size: size of the possible header before the ARP packet in the skb * - * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise + * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise. */ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -754,9 +753,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, arphdr = (struct arphdr *)(skb->data + hdr_size + ETH_HLEN); - /* Check whether the ARP packet carries a valid - * IP information - */ + /* check whether the ARP packet carries a valid IP information */ if (arphdr->ar_hrd != htons(ARPHRD_ETHER)) goto out; @@ -784,7 +781,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, if (is_zero_ether_addr(hw_src) || is_multicast_ether_addr(hw_src)) goto out; - /* we don't care about the destination MAC address in ARP requests */ + /* don't care about the destination MAC address in ARP requests */ if (arphdr->ar_op != htons(ARPOP_REQUEST)) { hw_dst = batadv_arp_hw_dst(skb, hdr_size); if (is_zero_ether_addr(hw_dst) || @@ -804,8 +801,8 @@ out: * @skb: packet to check * * Returns true if the message has been sent to the dht candidates, false - * otherwise. In case of true the message has to be enqueued to permit the - * fallback + * otherwise. In case of a positive return value the message has to be enqueued + * to permit the fallback. */ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb) @@ -867,7 +864,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); ret = true; } else { - /* Send the request on the DHT */ + /* Send the request to the DHT */ ret = batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_GET); } @@ -884,7 +881,7 @@ out: * @skb: packet to check * @hdr_size: size of the encapsulation header * - * Returns true if the request has been answered, false otherwise + * Returns true if the request has been answered, false otherwise. */ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -924,10 +921,9 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, if (!skb_new) goto out; - /* to preserve backwards compatibility, here the node has to answer - * using the same packet type it received for the request. This is due - * to that if a node is not using the 4addr packet format it may not - * support it. + /* To preserve backwards compatibility, the node has choose the outgoing + * format based on the incoming request packet type. The assumption is + * that a node not using the 4addr packet format doesn't support it. */ if (hdr_size == sizeof(struct batadv_unicast_4addr_packet)) err = batadv_unicast_4addr_send_skb(bat_priv, skb_new, @@ -977,7 +973,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, batadv_dat_entry_add(bat_priv, ip_dst, hw_dst); /* Send the ARP reply to the candidates for both the IP addresses that - * the node got within the ARP reply + * the node obtained from the ARP reply */ batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); @@ -987,7 +983,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, * DAT storage only * @bat_priv: the bat priv with all the soft interface information * @skb: packet to check - * @hdr_size: siaze of the encapsulation header + * @hdr_size: size of the encapsulation header */ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -1031,11 +1027,11 @@ out: /** * batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped - * (because the node has already got the reply via DAT) or not + * (because the node has already obtained the reply via DAT) or not * @bat_priv: the bat priv with all the soft interface information * @forw_packet: the broadcast packet * - * Returns true if the node can drop the packet, false otherwise + * Returns true if the node can drop the packet, false otherwise. */ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 522243aff2f3..c478e6bcf89b 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -117,6 +117,58 @@ static int batadv_is_valid_iface(const struct net_device *net_dev) return 1; } +/** + * batadv_is_wifi_netdev - check if the given net_device struct is a wifi + * interface + * @net_device: the device to check + * + * Returns true if the net device is a 802.11 wireless device, false otherwise. + */ +static bool batadv_is_wifi_netdev(struct net_device *net_device) +{ +#ifdef CONFIG_WIRELESS_EXT + /* pre-cfg80211 drivers have to implement WEXT, so it is possible to + * check for wireless_handlers != NULL + */ + if (net_device->wireless_handlers) + return true; +#endif + + /* cfg80211 drivers have to set ieee80211_ptr */ + if (net_device->ieee80211_ptr) + return true; + + return false; +} + +/** + * batadv_is_wifi_iface - check if the given interface represented by ifindex + * is a wifi interface + * @ifindex: interface index to check + * + * Returns true if the interface represented by ifindex is a 802.11 wireless + * device, false otherwise. + */ +bool batadv_is_wifi_iface(int ifindex) +{ + struct net_device *net_device = NULL; + bool ret = false; + + if (ifindex == BATADV_NULL_IFINDEX) + goto out; + + net_device = dev_get_by_index(&init_net, ifindex); + if (!net_device) + goto out; + + ret = batadv_is_wifi_netdev(net_device); + +out: + if (net_device) + dev_put(net_device); + return ret; +} + static struct batadv_hard_iface * batadv_hardif_get_active(const struct net_device *soft_iface) { @@ -525,7 +577,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) dev_hold(net_dev); - hard_iface = kmalloc(sizeof(*hard_iface), GFP_ATOMIC); + hard_iface = kzalloc(sizeof(*hard_iface), GFP_ATOMIC); if (!hard_iface) goto release_dev; @@ -541,18 +593,16 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_WORK(&hard_iface->cleanup_work, batadv_hardif_remove_interface_finish); + hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT; + if (batadv_is_wifi_netdev(net_dev)) + hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; + /* extra reference for return */ atomic_set(&hard_iface->refcount, 2); batadv_check_known_mac_addr(hard_iface->net_dev); list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); - /* This can't be called via a bat_priv callback because - * we have no bat_priv yet. - */ - atomic_set(&hard_iface->bat_iv.ogm_seqno, 1); - hard_iface->bat_iv.ogm_buff = NULL; - return hard_iface; free_if: @@ -595,7 +645,7 @@ void batadv_hardif_remove_interfaces(void) static int batadv_hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *net_dev = ptr; + struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); struct batadv_hard_iface *hard_iface; struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; @@ -657,38 +707,6 @@ out: return NOTIFY_DONE; } -/* This function returns true if the interface represented by ifindex is a - * 802.11 wireless device - */ -bool batadv_is_wifi_iface(int ifindex) -{ - struct net_device *net_device = NULL; - bool ret = false; - - if (ifindex == BATADV_NULL_IFINDEX) - goto out; - - net_device = dev_get_by_index(&init_net, ifindex); - if (!net_device) - goto out; - -#ifdef CONFIG_WIRELESS_EXT - /* pre-cfg80211 drivers have to implement WEXT, so it is possible to - * check for wireless_handlers != NULL - */ - if (net_device->wireless_handlers) - ret = true; - else -#endif - /* cfg80211 drivers have to set ieee80211_ptr */ - if (net_device->ieee80211_ptr) - ret = true; -out: - if (net_device) - dev_put(net_device); - return ret; -} - struct notifier_block batadv_hard_if_notifier = { .notifier_call = batadv_hard_if_event, }; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 0ba6c899b2d3..b27508b8085c 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -177,13 +177,13 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, if (len >= sizeof(struct batadv_icmp_packet_rr)) packet_len = sizeof(struct batadv_icmp_packet_rr); - skb = dev_alloc_skb(packet_len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, packet_len + ETH_HLEN); if (!skb) { len = -ENOMEM; goto out; } - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len); if (copy_from_user(icmp_packet, buff, packet_len)) { diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 59a0d6af15c8..5e9aebb7d56b 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -26,7 +26,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2013.2.0" +#define BATADV_SOURCE_VERSION "2013.3.0" #endif /* B.A.T.M.A.N. parameters */ @@ -76,6 +76,11 @@ #define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */ +/* number of packets to send for broadcasts on different interface types */ +#define BATADV_NUM_BCASTS_DEFAULT 1 +#define BATADV_NUM_BCASTS_WIRELESS 3 +#define BATADV_NUM_BCASTS_MAX 3 + /* msecs after which an ARP_REQUEST is sent in broadcast as fallback */ #define ARP_REQ_DELAY 250 /* numbers of originator to contact for any PUT/GET DHT operation */ @@ -157,6 +162,17 @@ enum batadv_uev_type { #include <linux/seq_file.h> #include "types.h" +/** + * batadv_vlan_flags - flags for the four MSB of any vlan ID field + * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not + */ +enum batadv_vlan_flags { + BATADV_VLAN_HAS_TAG = BIT(15), +}; + +#define BATADV_PRINT_VID(vid) (vid & BATADV_VLAN_HAS_TAG ? \ + (int)(vid & VLAN_VID_MASK) : -1) + extern char batadv_routing_algo[]; extern struct list_head batadv_hardif_list; diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index e84629ece9b7..a487d46e0aec 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1245,7 +1245,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, return; /* Set the mac header as if we actually sent the packet uncoded */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN); memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN); @@ -1359,18 +1359,17 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, * buffer * @skb: data skb to forward * @neigh_node: next hop to forward packet to - * @ethhdr: pointer to the ethernet header inside the skb * * Returns true if the skb was consumed (encoded packet sent) or false otherwise */ bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr) + struct batadv_neigh_node *neigh_node) { const struct net_device *netdev = neigh_node->if_incoming->soft_iface; struct batadv_priv *bat_priv = netdev_priv(netdev); struct batadv_unicast_packet *packet; struct batadv_nc_path *nc_path; + struct ethhdr *ethhdr = eth_hdr(skb); __be32 packet_id; u8 *payload; @@ -1423,7 +1422,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, { struct batadv_unicast_packet *packet; struct batadv_nc_path *nc_path; - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *ethhdr = eth_hdr(skb); __be32 packet_id; u8 *payload; @@ -1482,7 +1481,7 @@ out: void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, struct sk_buff *skb) { - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *ethhdr = eth_hdr(skb); if (batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return; @@ -1533,7 +1532,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, skb_reset_network_header(skb); /* Reconstruct original mac header */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr, ðhdr_tmp, sizeof(*ethhdr)); /* Select the correct unicast header information based on the location @@ -1677,7 +1676,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, return NET_RX_DROP; coded_packet = (struct batadv_coded_packet *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* Verify frame is destined for us */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) && @@ -1763,6 +1762,13 @@ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) /* For each orig_node in this bin */ rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + /* no need to print the orig node if it does not have + * network coding neighbors + */ + if (list_empty(&orig_node->in_coding_list) && + list_empty(&orig_node->out_coding_list)) + continue; + seq_printf(seq, "Node: %pM\n", orig_node->orig); seq_puts(seq, " Ingoing: "); diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 4fa6d0caddbd..85a4ec81ad50 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -36,8 +36,7 @@ void batadv_nc_purge_orig(struct batadv_priv *bat_priv, void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv); void batadv_nc_init_orig(struct batadv_orig_node *orig_node); bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr); + struct batadv_neigh_node *neigh_node); void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, struct sk_buff *skb); void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, @@ -87,8 +86,7 @@ static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node) } static inline bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr) + struct batadv_neigh_node *neigh_node) { return false; } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index fad1a2093e15..f50553a7de62 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -92,7 +92,7 @@ batadv_orig_node_get_router(struct batadv_orig_node *orig_node) struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr, uint32_t seqno) + const uint8_t *neigh_addr) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_neigh_node *neigh_node; @@ -110,8 +110,8 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, atomic_set(&neigh_node->refcount, 2); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Creating new neighbor %pM, initial seqno %d\n", - neigh_addr, seqno); + "Creating new neighbor %pM on interface %s\n", neigh_addr, + hard_iface->net_dev->name); out: return neigh_node; diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 734e5a3d8a5b..7887b84a9af4 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -31,7 +31,7 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr, uint32_t seqno); + const uint8_t *neigh_addr); void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node); struct batadv_neigh_node * batadv_orig_node_get_router(struct batadv_orig_node *orig_node); diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c deleted file mode 100644 index ccab0bbdbb59..000000000000 --- a/net/batman-adv/ring_buffer.c +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: - * - * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#include "main.h" -#include "ring_buffer.h" - -void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, - uint8_t value) -{ - lq_recv[*lq_index] = value; - *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE; -} - -uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]) -{ - const uint8_t *ptr; - uint16_t count = 0, i = 0, sum = 0; - - ptr = lq_recv; - - while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) { - if (*ptr != 0) { - count++; - sum += *ptr; - } - - i++; - ptr++; - } - - if (count == 0) - return 0; - - return (uint8_t)(sum / count); -} diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h deleted file mode 100644 index 3f92ae248e83..000000000000 --- a/net/batman-adv/ring_buffer.h +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: - * - * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#ifndef _NET_BATMAN_ADV_RING_BUFFER_H_ -#define _NET_BATMAN_ADV_RING_BUFFER_H_ - -void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, - uint8_t value); -uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]); - -#endif /* _NET_BATMAN_ADV_RING_BUFFER_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index b27a4d792d15..2f0bd3ffe6e8 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -34,35 +34,6 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); -void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) -{ - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_head *head; - struct batadv_orig_node *orig_node; - unsigned long *word; - uint32_t i; - size_t word_index; - uint8_t *w; - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - spin_lock_bh(&orig_node->ogm_cnt_lock); - word_index = hard_iface->if_num * BATADV_NUM_WORDS; - word = &(orig_node->bcast_own[word_index]); - - batadv_bit_get_packet(bat_priv, word, 1, 0); - w = &orig_node->bcast_own_sum[hard_iface->if_num]; - *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE); - spin_unlock_bh(&orig_node->ogm_cnt_lock); - } - rcu_read_unlock(); - } -} - static void _batadv_update_route(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node) @@ -256,7 +227,7 @@ bool batadv_check_management_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, header_len))) return false; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with broadcast indication but unicast recipient */ if (!is_broadcast_ether_addr(ethhdr->h_dest)) @@ -314,7 +285,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, icmp_packet->msg_type = BATADV_ECHO_REPLY; icmp_packet->header.ttl = BATADV_TTL; - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -362,7 +333,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet->msg_type = BATADV_TTL_EXCEEDED; icmp_packet->header.ttl = BATADV_TTL; - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -392,7 +363,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, hdr_size))) goto out; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) @@ -439,7 +410,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmp_packet->header.ttl--; /* route it */ - if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) + if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -569,7 +540,7 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, if (unlikely(!pskb_may_pull(skb, hdr_size))) return -ENODATA; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) @@ -803,8 +774,8 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL; struct batadv_neigh_node *neigh_node = NULL; struct batadv_unicast_packet *unicast_packet; - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); - int ret = NET_RX_DROP; + struct ethhdr *ethhdr = eth_hdr(skb); + int res, ret = NET_RX_DROP; struct sk_buff *new_skb; unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -864,16 +835,19 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /* decrement ttl */ unicast_packet->header.ttl--; - /* network code packet if possible */ - if (batadv_nc_skb_forward(skb, neigh_node, ethhdr)) { - ret = NET_RX_SUCCESS; - } else if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) { - ret = NET_RX_SUCCESS; + res = batadv_send_skb_to_orig(skb, orig_node, recv_if); - /* Update stats counter */ + /* translate transmit result into receive result */ + if (res == NET_XMIT_SUCCESS) { + /* skb was transmitted and consumed */ batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, skb->len + ETH_HLEN); + + ret = NET_RX_SUCCESS; + } else if (res == NET_XMIT_POLICED) { + /* skb was buffered and consumed */ + ret = NET_RX_SUCCESS; } out: @@ -1165,7 +1139,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, hdr_size))) goto out; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with broadcast indication but unicast recipient */ if (!is_broadcast_ether_addr(ethhdr->h_dest)) @@ -1265,7 +1239,7 @@ int batadv_recv_vis_packet(struct sk_buff *skb, return NET_RX_DROP; vis_packet = (struct batadv_vis_packet *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* not for me */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 99eeafaba407..72a29bde2010 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -20,7 +20,6 @@ #ifndef _NET_BATMAN_ADV_ROUTING_H_ #define _NET_BATMAN_ADV_ROUTING_H_ -void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface); bool batadv_check_management_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, int header_len); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 263cfd1ccee7..e9ff8d801201 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -61,7 +61,7 @@ int batadv_send_skb_packet(struct sk_buff *skb, skb_reset_mac_header(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN); memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN); ethhdr->h_proto = __constant_htons(ETH_P_BATMAN); @@ -96,26 +96,37 @@ send_skb_err: * host, NULL can be passed as recv_if and no interface alternating is * attempted. * - * Returns TRUE on success; FALSE otherwise. + * Returns NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or + * NET_XMIT_POLICED if the skb is buffered for later transmit. */ -bool batadv_send_skb_to_orig(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - struct batadv_hard_iface *recv_if) +int batadv_send_skb_to_orig(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if) { struct batadv_priv *bat_priv = orig_node->bat_priv; struct batadv_neigh_node *neigh_node; + int ret = NET_XMIT_DROP; /* batadv_find_router() increases neigh_nodes refcount if found. */ neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); if (!neigh_node) - return false; + return ret; - /* route it */ - batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + /* try to network code the packet, if it is received on an interface + * (i.e. being forwarded). If the packet originates from this node or if + * network coding fails, then send the packet as usual. + */ + if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) { + ret = NET_XMIT_POLICED; + } else { + batadv_send_skb_packet(skb, neigh_node->if_incoming, + neigh_node->addr); + ret = NET_XMIT_SUCCESS; + } batadv_neigh_node_free_ref(neigh_node); - return true; + return ret; } void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface) @@ -152,8 +163,6 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, unsigned long send_time) { - INIT_HLIST_NODE(&forw_packet->list); - /* add new packet to packet list */ spin_lock_bh(&bat_priv->forw_bcast_list_lock); hlist_add_head(&forw_packet->list, &bat_priv->forw_bcast_list); @@ -260,6 +269,9 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) if (hard_iface->soft_iface != soft_iface) continue; + if (forw_packet->num_packets >= hard_iface->num_bcasts) + continue; + /* send a copy of the saved skb */ skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); if (skb1) @@ -271,7 +283,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) forw_packet->num_packets++; /* if we still have some more bcasts to send */ - if (forw_packet->num_packets < 3) { + if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) { _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, msecs_to_jiffies(5)); return; diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 38e662f619ac..e7b17880fca4 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -23,9 +23,9 @@ int batadv_send_skb_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, const uint8_t *dst_addr); -bool batadv_send_skb_to_orig(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - struct batadv_hard_iface *recv_if); +int batadv_send_skb_to_orig(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if); void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface); int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, const struct sk_buff *skb, diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 819dfb006cdf..700d0b49742d 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -154,7 +154,7 @@ static int batadv_interface_tx(struct sk_buff *skb, 0x00, 0x00}; unsigned int header_len = 0; int data_len = skb->len, ret; - short vid __maybe_unused = -1; + unsigned short vid __maybe_unused = BATADV_NO_FLAGS; bool do_bcast = false; uint32_t seqno; unsigned long brd_delay = 1; @@ -303,7 +303,7 @@ void batadv_interface_rx(struct net_device *soft_iface, struct ethhdr *ethhdr; struct vlan_ethhdr *vhdr; struct batadv_header *batadv_header = (struct batadv_header *)skb->data; - short vid __maybe_unused = -1; + unsigned short vid __maybe_unused = BATADV_NO_FLAGS; __be16 ethertype = __constant_htons(ETH_P_BATMAN); bool is_bcast; @@ -316,7 +316,7 @@ void batadv_interface_rx(struct net_device *soft_iface, skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 9e8748575845..429aeef3d8b2 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -163,10 +163,19 @@ batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); } +/** + * batadv_tt_local_event - store a local TT event (ADD/DEL) + * @bat_priv: the bat priv with all the soft interface information + * @tt_local_entry: the TT entry involved in the event + * @event_flags: flags to store in the event structure + */ static void batadv_tt_local_event(struct batadv_priv *bat_priv, - const uint8_t *addr, uint8_t flags) + struct batadv_tt_local_entry *tt_local_entry, + uint8_t event_flags) { struct batadv_tt_change_node *tt_change_node, *entry, *safe; + struct batadv_tt_common_entry *common = &tt_local_entry->common; + uint8_t flags = common->flags | event_flags; bool event_removed = false; bool del_op_requested, del_op_entry; @@ -176,7 +185,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, return; tt_change_node->change.flags = flags; - memcpy(tt_change_node->change.addr, addr, ETH_ALEN); + memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN); del_op_requested = flags & BATADV_TT_CLIENT_DEL; @@ -184,7 +193,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->tt.changes_list_lock); list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { - if (!batadv_compare_eth(entry->change.addr, addr)) + if (!batadv_compare_eth(entry->change.addr, common->addr)) continue; /* DEL+ADD in the same orig interval have no effect and can be @@ -332,7 +341,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, } add_event: - batadv_tt_local_event(bat_priv, addr, tt_local->common.flags); + batadv_tt_local_event(bat_priv, tt_local, BATADV_NO_FLAGS); check_roaming: /* Check whether it is a roaming, but don't do anything if the roaming @@ -529,8 +538,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, struct batadv_tt_local_entry *tt_local_entry, uint16_t flags, const char *message) { - batadv_tt_local_event(bat_priv, tt_local_entry->common.addr, - tt_local_entry->common.flags | flags); + batadv_tt_local_event(bat_priv, tt_local_entry, flags); /* The local client has to be marked as "pending to be removed" but has * to be kept in the table in order to send it in a full table @@ -584,8 +592,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, /* if this client has been added right now, it is possible to * immediately purge it */ - batadv_tt_local_event(bat_priv, tt_local_entry->common.addr, - curr_flags | BATADV_TT_CLIENT_DEL); + batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); hlist_del_rcu(&tt_local_entry->common.hash_entry); batadv_tt_local_entry_free_ref(tt_local_entry); @@ -791,10 +798,25 @@ out: batadv_tt_orig_list_entry_free_ref(orig_entry); } -/* caller must hold orig_node refcount */ +/** + * batadv_tt_global_add - add a new TT global entry or update an existing one + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: the originator announcing the client + * @tt_addr: the mac address of the non-mesh client + * @flags: TT flags that have to be set for this non-mesh client + * @ttvn: the tt version number ever announcing this non-mesh client + * + * Add a new TT global entry for the given originator. If the entry already + * exists add a new reference to the given originator (a global entry can have + * references to multiple originators) and adjust the flags attribute to reflect + * the function argument. + * If a TT local entry exists for this non-mesh client remove it. + * + * The caller must hold orig_node refcount. + */ int batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *tt_addr, uint8_t flags, + const unsigned char *tt_addr, uint16_t flags, uint8_t ttvn) { struct batadv_tt_global_entry *tt_global_entry; @@ -1600,11 +1622,11 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = tt_query_size + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len); tt_response->ttvn = ttvn; @@ -1665,11 +1687,11 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, if (!tt_req_node) goto out; - skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, sizeof(*tt_request) + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); tt_req_len = sizeof(*tt_request); tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len); @@ -1691,7 +1713,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_TX); - if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: @@ -1715,7 +1737,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, struct batadv_orig_node *req_dst_orig_node; struct batadv_orig_node *res_dst_orig_node = NULL; uint8_t orig_ttvn, req_ttvn, ttvn; - int ret = false; + int res, ret = false; unsigned char *tt_buff; bool full_table; uint16_t tt_len, tt_tot; @@ -1762,11 +1784,11 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto unlock; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); packet_pos = skb_put(skb, len); tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; @@ -1810,8 +1832,10 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); - if (batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL)) + res = batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL); + if (res != NET_XMIT_DROP) ret = true; + goto out; unlock: @@ -1878,11 +1902,11 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto unlock; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); packet_pos = skb_put(skb, len); tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; @@ -1925,7 +1949,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = true; goto out; @@ -2212,11 +2236,11 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, if (!batadv_tt_check_roam_count(bat_priv, client)) goto out; - skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len); @@ -2238,7 +2262,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX); - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index ab8e683b402f..659a3bb759ce 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -33,7 +33,7 @@ void batadv_tt_global_add_orig(struct batadv_priv *bat_priv, const unsigned char *tt_buff, int tt_buff_len); int batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *addr, uint8_t flags, + const unsigned char *addr, uint16_t flags, uint8_t ttvn); int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index aba8364c3689..b2c94e139319 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -61,6 +61,7 @@ struct batadv_hard_iface_bat_iv { * @if_status: status of the interface for batman-adv * @net_dev: pointer to the net_device * @frag_seqno: last fragment sequence number sent by this interface + * @num_bcasts: number of payload re-broadcasts on this interface (ARQ) * @hardif_obj: kobject of the per interface sysfs "mesh" directory * @refcount: number of contexts the object is used * @batman_adv_ptype: packet type describing packets that should be processed by @@ -76,6 +77,7 @@ struct batadv_hard_iface { char if_status; struct net_device *net_dev; atomic_t frag_seqno; + uint8_t num_bcasts; struct kobject *hardif_obj; atomic_t refcount; struct packet_type batman_adv_ptype; @@ -640,7 +642,7 @@ struct batadv_socket_packet { #ifdef CONFIG_BATMAN_ADV_BLA struct batadv_bla_backbone_gw { uint8_t orig[ETH_ALEN]; - short vid; + unsigned short vid; struct hlist_node hash_entry; struct batadv_priv *bat_priv; unsigned long lasttime; @@ -663,7 +665,7 @@ struct batadv_bla_backbone_gw { */ struct batadv_bla_claim { uint8_t addr[ETH_ALEN]; - short vid; + unsigned short vid; struct batadv_bla_backbone_gw *backbone_gw; unsigned long lasttime; struct hlist_node hash_entry; diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 0bb3b5982f94..dc8b5d4dd636 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -464,7 +464,7 @@ find_router: goto out; } - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 1625e5793a89..4983340f1943 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -392,12 +392,12 @@ batadv_add_packet(struct batadv_priv *bat_priv, return NULL; len = sizeof(*packet) + vis_info_len; - info->skb_packet = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + info->skb_packet = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!info->skb_packet) { kfree(info); return NULL; } - skb_reserve(info->skb_packet, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(info->skb_packet, ETH_HLEN); packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len); kref_init(&info->refcount); @@ -697,7 +697,7 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node; struct batadv_vis_packet *packet; struct sk_buff *skb; - uint32_t i; + uint32_t i, res; packet = (struct batadv_vis_packet *)info->skb_packet->data; @@ -724,7 +724,8 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, if (!skb) continue; - if (!batadv_send_skb_to_orig(skb, orig_node, NULL)) + res = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (res == NET_XMIT_DROP) kfree_skb(skb); } rcu_read_unlock(); @@ -748,7 +749,7 @@ static void batadv_unicast_vis_packet(struct batadv_priv *bat_priv, if (!skb) goto out; - if (!batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP) kfree_skb(skb); out: @@ -854,13 +855,13 @@ int batadv_vis_init(struct batadv_priv *bat_priv) if (!bat_priv->vis.my_info) goto err; - len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE; - len += ETH_HLEN + NET_IP_ALIGN; - bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len); + len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN; + bat_priv->vis.my_info->skb_packet = netdev_alloc_skb_ip_align(NULL, + len); if (!bat_priv->vis.my_info->skb_packet) goto free_info; - skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN); tmp_skb = bat_priv->vis.my_info->skb_packet; packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet)); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 967312803e41..2ef66781fedb 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -22,6 +22,9 @@ #include <asm/uaccess.h> #include "br_private.h" +#define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \ + NETIF_F_GSO_MASK | NETIF_F_HW_CSUM) + /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -55,10 +58,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_pull(skb, ETH_HLEN); if (is_broadcast_ether_addr(dest)) - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); else if (is_multicast_ether_addr(dest)) { if (unlikely(netpoll_tx_running(dev))) { - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); goto out; } if (br_multicast_rcv(br, NULL, skb)) { @@ -70,11 +73,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) br_multicast_deliver(mdst, skb); else - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) br_deliver(dst->dst, skb); else - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, true); out: rcu_read_unlock(); @@ -346,12 +349,10 @@ void br_dev_setup(struct net_device *dev) dev->tx_queue_len = 0; dev->priv_flags = IFF_EBRIDGE; - dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX | - NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX; - dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_CTAG_TX; + dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | + NETIF_F_HW_VLAN_CTAG_TX; + dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX; + dev->vlan_features = COMMON_FEATURES; br->dev = dev; spin_lock_init(&br->lock); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 092b20e4ee4c..4b81b1471789 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -174,7 +174,8 @@ out: static void br_flood(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb0, void (*__packet_hook)(const struct net_bridge_port *p, - struct sk_buff *skb)) + struct sk_buff *skb), + bool unicast) { struct net_bridge_port *p; struct net_bridge_port *prev; @@ -182,6 +183,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, prev = NULL; list_for_each_entry_rcu(p, &br->port_list, list) { + /* Do not flood unicast traffic to ports that turn it off */ + if (unicast && !(p->flags & BR_FLOOD)) + continue; prev = maybe_deliver(prev, p, skb, __packet_hook); if (IS_ERR(prev)) goto out; @@ -203,16 +207,16 @@ out: /* called with rcu_read_lock */ -void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb) +void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast) { - br_flood(br, skb, NULL, __br_deliver); + br_flood(br, skb, NULL, __br_deliver, unicast); } /* called under bridge lock */ void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, - struct sk_buff *skb2) + struct sk_buff *skb2, bool unicast) { - br_flood(br, skb, skb2, __br_forward); + br_flood(br, skb, skb2, __br_forward, unicast); } #ifdef CONFIG_BRIDGE_IGMP_SNOOPING diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 4cdba60926ff..5623be6b9ecd 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -221,7 +221,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; - p->flags = 0; + p->flags = BR_LEARNING | BR_FLOOD; br_init_port(p); p->state = BR_STATE_DISABLED; br_stp_port_timer_init(p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 828e2bcc1f52..1b8b8b824cd7 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -65,6 +65,7 @@ int br_handle_frame_finish(struct sk_buff *skb) struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct sk_buff *skb2; + bool unicast = true; u16 vid = 0; if (!p || p->state == BR_STATE_DISABLED) @@ -75,7 +76,8 @@ int br_handle_frame_finish(struct sk_buff *skb) /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; - br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); + if (p->flags & BR_LEARNING) + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && br_multicast_rcv(br, p, skb)) @@ -94,9 +96,10 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; - if (is_broadcast_ether_addr(dest)) + if (is_broadcast_ether_addr(dest)) { skb2 = skb; - else if (is_multicast_ether_addr(dest)) { + unicast = false; + } else if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb, vid); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { if ((mdst && mdst->mglist) || @@ -109,6 +112,7 @@ int br_handle_frame_finish(struct sk_buff *skb) } else skb2 = skb; + unicast = false; br->dev->stats.multicast++; } else if ((dst = __br_fdb_get(br, dest, vid)) && dst->is_local) { @@ -122,7 +126,7 @@ int br_handle_frame_finish(struct sk_buff *skb) dst->used = jiffies; br_forward(dst->dst, skb, skb2); } else - br_flood_forward(br, skb, skb2); + br_flood_forward(br, skb, skb2, unicast); } if (skb2) @@ -142,7 +146,8 @@ static int br_handle_local_finish(struct sk_buff *skb) u16 vid = 0; br_vlan_get_tag(skb, &vid); - br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); + if (p->flags & BR_LEARNING) + br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); return 0; /* process further */ } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d6448e35e027..31952a103949 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -23,6 +23,7 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/timer.h> +#include <linux/inetdevice.h> #include <net/ip.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> @@ -381,7 +382,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, iph->frag_off = htons(IP_DF); iph->ttl = 1; iph->protocol = IPPROTO_IGMP; - iph->saddr = 0; + iph->saddr = br->multicast_query_use_ifaddr ? + inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0; iph->daddr = htonl(INADDR_ALLHOSTS_GROUP); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; @@ -616,8 +618,6 @@ rehash: mp->br = br; mp->addr = *group; - setup_timer(&mp->timer, br_multicast_group_expired, - (unsigned long)mp); hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); mdb->size++; @@ -655,7 +655,6 @@ static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; - unsigned long now = jiffies; int err; spin_lock(&br->multicast_lock); @@ -670,7 +669,6 @@ static int br_multicast_add_group(struct net_bridge *br, if (!port) { mp->mglist = true; - mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } @@ -678,7 +676,7 @@ static int br_multicast_add_group(struct net_bridge *br, (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->port == port) - goto found; + goto out; if ((unsigned long)p->port < (unsigned long)port) break; } @@ -689,8 +687,6 @@ static int br_multicast_add_group(struct net_bridge *br, rcu_assign_pointer(*pp, p); br_mdb_notify(br->dev, port, group, RTM_NEWMDB); -found: - mod_timer(&p->timer, now + br->multicast_membership_interval); out: err = 0; @@ -1130,6 +1126,10 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!mp) goto out; + setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); + mod_timer(&mp->timer, now + br->multicast_membership_interval); + mp->timer_armed = true; + max_delay *= br->multicast_last_member_count; if (mp->mglist && @@ -1204,6 +1204,10 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!mp) goto out; + setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); + mod_timer(&mp->timer, now + br->multicast_membership_interval); + mp->timer_armed = true; + max_delay *= br->multicast_last_member_count; if (mp->mglist && (timer_pending(&mp->timer) ? @@ -1247,6 +1251,32 @@ static void br_multicast_leave_group(struct net_bridge *br, if (!mp) goto out; + if (br->multicast_querier && + !timer_pending(&br->multicast_querier_timer)) { + __br_multicast_send_query(br, port, &mp->addr); + + time = jiffies + br->multicast_last_member_count * + br->multicast_last_member_interval; + mod_timer(port ? &port->multicast_query_timer : + &br->multicast_query_timer, time); + + for (p = mlock_dereference(mp->ports, br); + p != NULL; + p = mlock_dereference(p->next, br)) { + if (p->port != port) + continue; + + if (!hlist_unhashed(&p->mglist) && + (timer_pending(&p->timer) ? + time_after(p->timer.expires, time) : + try_to_del_timer_sync(&p->timer) >= 0)) { + mod_timer(&p->timer, time); + } + + break; + } + } + if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { struct net_bridge_port_group __rcu **pp; @@ -1262,7 +1292,7 @@ static void br_multicast_leave_group(struct net_bridge *br, call_rcu_bh(&p->rcu, br_multicast_free_pg); br_mdb_notify(br->dev, port, group, RTM_DELMDB); - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->mglist && mp->timer_armed && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); } @@ -1274,30 +1304,12 @@ static void br_multicast_leave_group(struct net_bridge *br, br->multicast_last_member_interval; if (!port) { - if (mp->mglist && + if (mp->mglist && mp->timer_armed && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { mod_timer(&mp->timer, time); } - - goto out; - } - - for (p = mlock_dereference(mp->ports, br); - p != NULL; - p = mlock_dereference(p->next, br)) { - if (p->port != port) - continue; - - if (!hlist_unhashed(&p->mglist) && - (timer_pending(&p->timer) ? - time_after(p->timer.expires, time) : - try_to_del_timer_sync(&p->timer) >= 0)) { - mod_timer(&p->timer, time); - } - - break; } out: @@ -1619,6 +1631,7 @@ void br_multicast_init(struct net_bridge *br) br->multicast_router = 1; br->multicast_querier = 0; + br->multicast_query_use_ifaddr = 0; br->multicast_last_member_count = 2; br->multicast_startup_query_count = 2; @@ -1672,6 +1685,7 @@ void br_multicast_stop(struct net_bridge *br) hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], hlist[ver]) { del_timer(&mp->timer); + mp->timer_armed = false; call_rcu_bh(&mp->rcu, br_multicast_free_group); } } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 1ed75bfd8d1d..f87736270eaa 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -992,7 +992,7 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { #ifdef CONFIG_SYSCTL static -int brnf_sysctl_call_tables(ctl_table * ctl, int write, +int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, void __user * buffer, size_t * lenp, loff_t * ppos) { int ret; @@ -1004,7 +1004,7 @@ int brnf_sysctl_call_tables(ctl_table * ctl, int write, return ret; } -static ctl_table brnf_table[] = { +static struct ctl_table brnf_table[] = { { .procname = "bridge-nf-call-arptables", .data = &brnf_call_arptables, diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 8e3abf564798..1fc30abd3a52 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -30,6 +30,8 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + 0; } @@ -56,7 +58,9 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || - nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE))) + nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || + nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || + nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD))) return -EMSGSIZE; return 0; @@ -281,6 +285,8 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, + [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, + [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -328,6 +334,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); + br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); + br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 1644b3e1f947..3a3f371b2841 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -31,7 +31,7 @@ struct notifier_block br_device_notifier = { */ static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net_bridge_port *p; struct net_bridge *br; bool changed_addr; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d2c043a857b6..3be89b3ce17b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -112,6 +112,7 @@ struct net_bridge_mdb_entry struct timer_list timer; struct br_ip addr; bool mglist; + bool timer_armed; }; struct net_bridge_mdb_htable @@ -157,6 +158,8 @@ struct net_bridge_port #define BR_ROOT_BLOCK 0x00000004 #define BR_MULTICAST_FAST_LEAVE 0x00000008 #define BR_ADMIN_COST 0x00000010 +#define BR_LEARNING 0x00000020 +#define BR_FLOOD 0x00000040 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; @@ -249,6 +252,7 @@ struct net_bridge u8 multicast_disabled:1; u8 multicast_querier:1; + u8 multicast_query_use_ifaddr:1; u32 hash_elasticity; u32 hash_max; @@ -411,9 +415,10 @@ extern int br_dev_queue_push_xmit(struct sk_buff *skb); extern void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0); extern int br_forward_finish(struct sk_buff *skb); -extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb); +extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, + bool unicast); extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, - struct sk_buff *skb2); + struct sk_buff *skb2, bool unicast); /* br_if.c */ extern void br_port_carrier_check(struct net_bridge_port *p); diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 8baa9c08e1a4..394bb96b6087 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -375,6 +375,31 @@ static ssize_t store_multicast_snooping(struct device *d, static DEVICE_ATTR(multicast_snooping, S_IRUGO | S_IWUSR, show_multicast_snooping, store_multicast_snooping); +static ssize_t show_multicast_query_use_ifaddr(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->multicast_query_use_ifaddr); +} + +static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val) +{ + br->multicast_query_use_ifaddr = !!val; + return 0; +} + +static ssize_t +store_multicast_query_use_ifaddr(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_query_use_ifaddr); +} +static DEVICE_ATTR(multicast_query_use_ifaddr, S_IRUGO | S_IWUSR, + show_multicast_query_use_ifaddr, + store_multicast_query_use_ifaddr); + static ssize_t show_multicast_querier(struct device *d, struct device_attribute *attr, char *buf) @@ -734,6 +759,7 @@ static struct attribute *bridge_attrs[] = { &dev_attr_multicast_router.attr, &dev_attr_multicast_snooping.attr, &dev_attr_multicast_querier.attr, + &dev_attr_multicast_query_use_ifaddr.attr, &dev_attr_hash_elasticity.attr, &dev_attr_hash_max.attr, &dev_attr_multicast_last_member_count.attr, diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index a1ef1b6e14dc..2a2cdb756d51 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -158,6 +158,8 @@ static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); +BRPORT_ATTR_FLAG(learning, BR_LEARNING); +BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -195,6 +197,8 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_hairpin_mode, &brport_attr_bpdu_guard, &brport_attr_root_block, + &brport_attr_learning, + &brport_attr_unicast_flood, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, &brport_attr_multicast_fast_leave, diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index df0364aa12d5..518093802d1d 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -271,6 +271,12 @@ static int ebt_ulog_tg_check(const struct xt_tgchk_param *par) { struct ebt_ulog_info *uloginfo = par->targinfo; + if (!par->net->xt.ebt_ulog_warn_deprecated) { + pr_info("ebt_ulog is deprecated and it will be removed soon, " + "use ebt_nflog instead\n"); + par->net->xt.ebt_ulog_warn_deprecated = true; + } + if (uloginfo->nlgroup > 31) return -EINVAL; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 3d110c4fc787..ac7802428384 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1339,7 +1339,7 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m, /* ebtables expects 32 bytes long names but xt_match names are 29 bytes long. Copy 29 bytes and fill remaining bytes with zeroes. */ - strncpy(name, m->u.match->name, sizeof(name)); + strlcpy(name, m->u.match->name, sizeof(name)); if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; @@ -1351,7 +1351,7 @@ static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, char __user *hlp = ubase + ((char *)w - base); char name[EBT_FUNCTION_MAXNAMELEN] = {}; - strncpy(name, w->u.watcher->name, sizeof(name)); + strlcpy(name, w->u.watcher->name, sizeof(name)); if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; @@ -1377,7 +1377,7 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase) ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase); if (ret != 0) return ret; - strncpy(name, t->u.target->name, sizeof(name)); + strlcpy(name, t->u.target->name, sizeof(name)); if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 1f9ece1a9c34..4dca159435cf 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -352,9 +352,9 @@ EXPORT_SYMBOL(caif_enroll_dev); /* notify Caif of device events */ static int caif_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct caif_device_entry *caifd = NULL; struct caif_dev_common *caifdev; struct cfcnfg *cfg; diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 942e00a425fd..75ed04b78fa4 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -121,9 +121,9 @@ static struct packet_type caif_usb_type __read_mostly = { }; static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct caif_dev_common common; struct cflayer *layer, *link_support; struct usbnet *usbnet; diff --git a/net/can/af_can.c b/net/can/af_can.c index c4e50852c9f4..3ab8dd2e1282 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -794,9 +794,9 @@ EXPORT_SYMBOL(can_proto_unregister); * af_can notifier to create/remove CAN netdevice specific structs */ static int can_notifier(struct notifier_block *nb, unsigned long msg, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dev_rcv_lists *d; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/can/bcm.c b/net/can/bcm.c index 8f113e6ff327..46f20bfafc0e 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1350,9 +1350,9 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, * notification handler for netdevice status changes */ static int bcm_notifier(struct notifier_block *nb, unsigned long msg, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier); struct sock *sk = &bo->sk; struct bcm_op *op; diff --git a/net/can/gw.c b/net/can/gw.c index 3ee690e8c7d3..2f291f961a17 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -445,9 +445,9 @@ static inline void cgw_unregister_filter(struct cgw_job *gwj) } static int cgw_notifier(struct notifier_block *nb, - unsigned long msg, void *data) + unsigned long msg, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/can/raw.c b/net/can/raw.c index 1085e65f848e..641e1c895123 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -239,9 +239,9 @@ static int raw_enable_allfilters(struct net_device *dev, struct sock *sk) } static int raw_notifier(struct notifier_block *nb, - unsigned long msg, void *data) + unsigned long msg, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct raw_sock *ro = container_of(nb, struct raw_sock, notifier); struct sock *sk = &ro->sk; diff --git a/net/core/datagram.c b/net/core/datagram.c index b71423db7785..9cbaba98ce4c 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -56,6 +56,7 @@ #include <net/sock.h> #include <net/tcp_states.h> #include <trace/events/skb.h> +#include <net/ll_poll.h> /* * Is a socket 'connection oriented' ? @@ -207,6 +208,9 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, } spin_unlock_irqrestore(&queue->lock, cpu_flags); + if (sk_valid_ll(sk) && sk_poll_ll(sk, flags & MSG_DONTWAIT)) + continue; + /* User doesn't want to wait */ error = -EAGAIN; if (!timeo) diff --git a/net/core/dev.c b/net/core/dev.c index fc1e289397f5..fa007dba6beb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -129,6 +129,7 @@ #include <linux/inetdevice.h> #include <linux/cpu_rmap.h> #include <linux/static_key.h> +#include <linux/hashtable.h> #include "net-sysfs.h" @@ -166,6 +167,12 @@ static struct list_head offload_base __read_mostly; DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +/* protects napi_hash addition/deletion and napi_gen_id */ +static DEFINE_SPINLOCK(napi_hash_lock); + +static unsigned int napi_gen_id; +static DEFINE_HASHTABLE(napi_hash, 8); + seqcount_t devnet_rename_seq; static inline void dev_base_seq_inc(struct net *net) @@ -1198,9 +1205,7 @@ static int __dev_open(struct net_device *dev) * If we don't do this there is a chance ndo_poll_controller * or ndo_poll may be running while we open the device */ - ret = netpoll_rx_disable(dev); - if (ret) - return ret; + netpoll_rx_disable(dev); ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); ret = notifier_to_errno(ret); @@ -1309,9 +1314,7 @@ static int __dev_close(struct net_device *dev) LIST_HEAD(single); /* Temporarily disable netpoll until the interface is down */ - retval = netpoll_rx_disable(dev); - if (retval) - return retval; + netpoll_rx_disable(dev); list_add(&dev->unreg_list, &single); retval = __dev_close_many(&single); @@ -1353,14 +1356,11 @@ static int dev_close_many(struct list_head *head) */ int dev_close(struct net_device *dev) { - int ret = 0; if (dev->flags & IFF_UP) { LIST_HEAD(single); /* Block netpoll rx while the interface is going down */ - ret = netpoll_rx_disable(dev); - if (ret) - return ret; + netpoll_rx_disable(dev); list_add(&dev->unreg_list, &single); dev_close_many(&single); @@ -1368,7 +1368,7 @@ int dev_close(struct net_device *dev) netpoll_rx_enable(dev); } - return ret; + return 0; } EXPORT_SYMBOL(dev_close); @@ -1398,6 +1398,14 @@ void dev_disable_lro(struct net_device *dev) } EXPORT_SYMBOL(dev_disable_lro); +static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, + struct net_device *dev) +{ + struct netdev_notifier_info info; + + netdev_notifier_info_init(&info, dev); + return nb->notifier_call(nb, val, &info); +} static int dev_boot_phase = 1; @@ -1430,7 +1438,7 @@ int register_netdevice_notifier(struct notifier_block *nb) goto unlock; for_each_net(net) { for_each_netdev(net, dev) { - err = nb->notifier_call(nb, NETDEV_REGISTER, dev); + err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); err = notifier_to_errno(err); if (err) goto rollback; @@ -1438,7 +1446,7 @@ int register_netdevice_notifier(struct notifier_block *nb) if (!(dev->flags & IFF_UP)) continue; - nb->notifier_call(nb, NETDEV_UP, dev); + call_netdevice_notifier(nb, NETDEV_UP, dev); } } @@ -1454,10 +1462,11 @@ rollback: goto outroll; if (dev->flags & IFF_UP) { - nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); - nb->notifier_call(nb, NETDEV_DOWN, dev); + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, + dev); + call_netdevice_notifier(nb, NETDEV_DOWN, dev); } - nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); } } @@ -1495,10 +1504,11 @@ int unregister_netdevice_notifier(struct notifier_block *nb) for_each_net(net) { for_each_netdev(net, dev) { if (dev->flags & IFF_UP) { - nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); - nb->notifier_call(nb, NETDEV_DOWN, dev); + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, + dev); + call_netdevice_notifier(nb, NETDEV_DOWN, dev); } - nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); } } unlock: @@ -1508,6 +1518,25 @@ unlock: EXPORT_SYMBOL(unregister_netdevice_notifier); /** + * call_netdevice_notifiers_info - call all network notifier blocks + * @val: value passed unmodified to notifier function + * @dev: net_device pointer passed unmodified to notifier function + * @info: notifier information data + * + * Call all network notifier blocks. Parameters and return value + * are as for raw_notifier_call_chain(). + */ + +int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, + struct netdev_notifier_info *info) +{ + ASSERT_RTNL(); + netdev_notifier_info_init(info, dev); + return raw_notifier_call_chain(&netdev_chain, val, info); +} +EXPORT_SYMBOL(call_netdevice_notifiers_info); + +/** * call_netdevice_notifiers - call all network notifier blocks * @val: value passed unmodified to notifier function * @dev: net_device pointer passed unmodified to notifier function @@ -1518,8 +1547,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - ASSERT_RTNL(); - return raw_notifier_call_chain(&netdev_chain, val, dev); + struct netdev_notifier_info info; + + return call_netdevice_notifiers_info(val, dev, &info); } EXPORT_SYMBOL(call_netdevice_notifiers); @@ -1629,7 +1659,6 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) return NET_RX_DROP; } skb->skb_iif = 0; - skb->dev = dev; skb_dst_drop(skb); skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; @@ -1702,7 +1731,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb2); if (skb_network_header(skb2) < skb2->data || - skb2->network_header > skb2->tail) { + skb_network_header(skb2) > skb_tail_pointer(skb2)) { net_crit_ratelimited("protocol %04x is buggy, dev %s\n", ntohs(skb2->protocol), dev->name); @@ -3065,6 +3094,46 @@ static int rps_ipi_queued(struct softnet_data *sd) return 0; } +#ifdef CONFIG_NET_FLOW_LIMIT +int netdev_flow_limit_table_len __read_mostly = (1 << 12); +#endif + +static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) +{ +#ifdef CONFIG_NET_FLOW_LIMIT + struct sd_flow_limit *fl; + struct softnet_data *sd; + unsigned int old_flow, new_flow; + + if (qlen < (netdev_max_backlog >> 1)) + return false; + + sd = &__get_cpu_var(softnet_data); + + rcu_read_lock(); + fl = rcu_dereference(sd->flow_limit); + if (fl) { + new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); + old_flow = fl->history[fl->history_head]; + fl->history[fl->history_head] = new_flow; + + fl->history_head++; + fl->history_head &= FLOW_LIMIT_HISTORY - 1; + + if (likely(fl->buckets[old_flow])) + fl->buckets[old_flow]--; + + if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) { + fl->count++; + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); +#endif + return false; +} + /* * enqueue_to_backlog is called to queue an skb to a per CPU backlog * queue (may be a remote CPU queue). @@ -3074,13 +3143,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, { struct softnet_data *sd; unsigned long flags; + unsigned int qlen; sd = &per_cpu(softnet_data, cpu); local_irq_save(flags); rps_lock(sd); - if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { + qlen = skb_queue_len(&sd->input_pkt_queue); + if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { if (skb_queue_len(&sd->input_pkt_queue)) { enqueue: __skb_queue_tail(&sd->input_pkt_queue, skb); @@ -3828,7 +3899,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb) NAPI_GRO_CB(skb)->frag0 = NULL; NAPI_GRO_CB(skb)->frag0_len = 0; - if (skb->mac_header == skb->tail && + if (skb_mac_header(skb) == skb_tail_pointer(skb) && pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); @@ -4072,6 +4143,58 @@ void napi_complete(struct napi_struct *n) } EXPORT_SYMBOL(napi_complete); +/* must be called under rcu_read_lock(), as we dont take a reference */ +struct napi_struct *napi_by_id(unsigned int napi_id) +{ + unsigned int hash = napi_id % HASH_SIZE(napi_hash); + struct napi_struct *napi; + + hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node) + if (napi->napi_id == napi_id) + return napi; + + return NULL; +} +EXPORT_SYMBOL_GPL(napi_by_id); + +void napi_hash_add(struct napi_struct *napi) +{ + if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) { + + spin_lock(&napi_hash_lock); + + /* 0 is not a valid id, we also skip an id that is taken + * we expect both events to be extremely rare + */ + napi->napi_id = 0; + while (!napi->napi_id) { + napi->napi_id = ++napi_gen_id; + if (napi_by_id(napi->napi_id)) + napi->napi_id = 0; + } + + hlist_add_head_rcu(&napi->napi_hash_node, + &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); + + spin_unlock(&napi_hash_lock); + } +} +EXPORT_SYMBOL_GPL(napi_hash_add); + +/* Warning : caller is responsible to make sure rcu grace period + * is respected before freeing memory containing @napi + */ +void napi_hash_del(struct napi_struct *napi) +{ + spin_lock(&napi_hash_lock); + + if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) + hlist_del_rcu(&napi->napi_hash_node); + + spin_unlock(&napi_hash_lock); +} +EXPORT_SYMBOL_GPL(napi_hash_del); + void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { @@ -4370,7 +4493,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, else list_add_tail_rcu(&upper->list, &dev->upper_dev_list); dev_hold(upper_dev); - + call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); return 0; } @@ -4430,6 +4553,7 @@ void netdev_upper_dev_unlink(struct net_device *dev, list_del_rcu(&upper->list); dev_put(upper_dev); kfree_rcu(upper, rcu); + call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -4700,8 +4824,13 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) } if (dev->flags & IFF_UP && - (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) - call_netdevice_notifiers(NETDEV_CHANGE, dev); + (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) { + struct netdev_notifier_change_info change_info; + + change_info.flags_changed = changes; + call_netdevice_notifiers_info(NETDEV_CHANGE, dev, + &change_info.info); + } } /** @@ -5235,6 +5364,10 @@ int register_netdevice(struct net_device *dev) */ dev->hw_enc_features |= NETIF_F_SG; + /* Make NETIF_F_SG inheritable to MPLS. + */ + dev->mpls_features |= NETIF_F_SG; + ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); ret = notifier_to_errno(ret); if (ret) @@ -6014,7 +6147,7 @@ netdev_features_t netdev_increment_features(netdev_features_t all, } EXPORT_SYMBOL(netdev_increment_features); -static struct hlist_head *netdev_create_hash(void) +static struct hlist_head * __net_init netdev_create_hash(void) { int i; struct hlist_head *hash; @@ -6270,6 +6403,10 @@ static int __init net_dev_init(void) sd->backlog.weight = weight_p; sd->backlog.gro_list = NULL; sd->backlog.gro_count = 0; + +#ifdef CONFIG_NET_FLOW_LIMIT + sd->flow_limit = NULL; +#endif } dev_boot_phase = 0; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index d23b6682f4e9..5e78d44333b9 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -295,9 +295,9 @@ static int net_dm_cmd_trace(struct sk_buff *skb, } static int dropmon_net_event(struct notifier_block *ev_block, - unsigned long event, void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dm_hw_stat_delta *new_stat = NULL; struct dm_hw_stat_delta *tmp; diff --git a/net/core/dst.c b/net/core/dst.c index df9cc810ec8e..ca4231ec7347 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -372,7 +372,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dst_entry *dst, *last = NULL; switch (event) { diff --git a/net/core/ethtool.c b/net/core/ethtool.c index ce91766eeca9..9255bbdf81ff 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -82,6 +82,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", + [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", @@ -1413,7 +1414,7 @@ static int ethtool_get_module_eeprom(struct net_device *dev, modinfo.eeprom_len); } -/* The main entry point in this file. Called from net/core/dev.c */ +/* The main entry point in this file. Called from net/core/dev_ioctl.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index d5a9f8ead0d8..21735440c44a 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -705,9 +705,9 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) static int fib_rules_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct fib_rules_ops *ops; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index d9d198aa9fed..6b5b6e7013ca 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -82,7 +82,7 @@ struct gen_estimator { struct list_head list; struct gnet_stats_basic_packed *bstats; - struct gnet_stats_rate_est *rate_est; + struct gnet_stats_rate_est64 *rate_est; spinlock_t *stats_lock; int ewma_log; u64 last_bytes; @@ -167,7 +167,7 @@ static void gen_add_node(struct gen_estimator *est) static struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est *rate_est) + const struct gnet_stats_rate_est64 *rate_est) { struct rb_node *p = est_root.rb_node; @@ -203,7 +203,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats * */ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est, + struct gnet_stats_rate_est64 *rate_est, spinlock_t *stats_lock, struct nlattr *opt) { @@ -258,7 +258,7 @@ EXPORT_SYMBOL(gen_new_estimator); * Note : Caller should respect an RCU grace period before freeing stats_lock */ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est) + struct gnet_stats_rate_est64 *rate_est) { struct gen_estimator *e; @@ -290,7 +290,7 @@ EXPORT_SYMBOL(gen_kill_estimator); * Returns 0 on success or a negative error code. */ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est, + struct gnet_stats_rate_est64 *rate_est, spinlock_t *stats_lock, struct nlattr *opt) { gen_kill_estimator(bstats, rate_est); @@ -306,7 +306,7 @@ EXPORT_SYMBOL(gen_replace_estimator); * Returns true if estimator is active, and false if not. */ bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est *rate_est) + const struct gnet_stats_rate_est64 *rate_est) { bool res; diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index ddedf211e588..9d3d9e78397b 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -143,18 +143,30 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); int gnet_stats_copy_rate_est(struct gnet_dump *d, const struct gnet_stats_basic_packed *b, - struct gnet_stats_rate_est *r) + struct gnet_stats_rate_est64 *r) { + struct gnet_stats_rate_est est; + int res; + if (b && !gen_estimator_active(b, r)) return 0; + est.bps = min_t(u64, UINT_MAX, r->bps); + /* we have some time before reaching 2^32 packets per second */ + est.pps = r->pps; + if (d->compat_tc_stats) { - d->tc_stats.bps = r->bps; - d->tc_stats.pps = r->pps; + d->tc_stats.bps = est.bps; + d->tc_stats.pps = est.pps; } - if (d->tail) - return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r)); + if (d->tail) { + res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est)); + if (res < 0 || est.bps == r->bps) + return res; + /* emit 64bit stats only if needed */ + return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r)); + } return 0; } diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 8f82a5cc3851..9c3a839322ba 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -92,6 +92,9 @@ static bool linkwatch_urgent_event(struct net_device *dev) if (dev->ifindex != dev->iflink) return true; + if (dev->priv_flags & IFF_TEAM_PORT) + return true; + return netif_carrier_ok(dev) && qdisc_tx_changing(dev); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5c56b217b999..decaa4b9db2f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2765,11 +2765,11 @@ EXPORT_SYMBOL(neigh_app_ns); static int zero; static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); -static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, - size_t *lenp, loff_t *ppos) +static int proc_unres_qlen(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { int size, ret; - ctl_table tmp = *ctl; + struct ctl_table tmp = *ctl; tmp.extra1 = &zero; tmp.extra2 = &unres_qlen_max; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 569d355fec3e..2bf83299600a 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -146,11 +146,23 @@ static void softnet_seq_stop(struct seq_file *seq, void *v) static int softnet_seq_show(struct seq_file *seq, void *v) { struct softnet_data *sd = v; + unsigned int flow_limit_count = 0; - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", +#ifdef CONFIG_NET_FLOW_LIMIT + struct sd_flow_limit *fl; + + rcu_read_lock(); + fl = rcu_dereference(sd->flow_limit); + if (fl) + flow_limit_count = fl->count; + rcu_read_unlock(); +#endif + + seq_printf(seq, + "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", sd->processed, sd->dropped, sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ - sd->cpu_collision, sd->received_rps); + sd->cpu_collision, sd->received_rps, flow_limit_count); return 0; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index cec074be8c43..03c8ec3edc72 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -247,7 +247,7 @@ static void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } -int netpoll_rx_disable(struct net_device *dev) +void netpoll_rx_disable(struct net_device *dev) { struct netpoll_info *ni; int idx; @@ -257,7 +257,6 @@ int netpoll_rx_disable(struct net_device *dev) if (ni) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); - return 0; } EXPORT_SYMBOL(netpoll_rx_disable); @@ -690,25 +689,20 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo send_skb->dev = skb->dev; skb_reset_network_header(send_skb); - skb_put(send_skb, sizeof(struct ipv6hdr)); - hdr = ipv6_hdr(send_skb); - + hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr)); *(__be32*)hdr = htonl(0x60000000); - hdr->payload_len = htons(size); hdr->nexthdr = IPPROTO_ICMPV6; hdr->hop_limit = 255; hdr->saddr = *saddr; hdr->daddr = *daddr; - send_skb->transport_header = send_skb->tail; - skb_put(send_skb, size); - - icmp6h = (struct icmp6hdr *)skb_transport_header(skb); + icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr)); icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; icmp6h->icmp6_router = 0; icmp6h->icmp6_solicited = 1; - target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr)); + + target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr)); *target = msg->target; icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, IPPROTO_ICMPV6, diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 0777d0aa18c3..e533259dce3c 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -261,7 +261,7 @@ struct cgroup_subsys net_prio_subsys = { static int netprio_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netprio_map *old; /* diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 11f2704c3810..9640972ec50e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1921,7 +1921,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d static int pktgen_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id); if (pn->pktgen_exiting) @@ -2627,6 +2627,29 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, pgh->tv_usec = htonl(timestamp.tv_usec); } +static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, + struct pktgen_dev *pkt_dev, + unsigned int extralen) +{ + struct sk_buff *skb = NULL; + unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen + + pkt_dev->pkt_overhead; + + if (pkt_dev->flags & F_NODE) { + int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id(); + + skb = __alloc_skb(NET_SKB_PAD + size, GFP_NOWAIT, 0, node); + if (likely(skb)) { + skb_reserve(skb, NET_SKB_PAD); + skb->dev = dev; + } + } else { + skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); + } + + return skb; +} + static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct pktgen_dev *pkt_dev) { @@ -2657,32 +2680,13 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, datalen = (odev->hard_header_len + 16) & ~0xf; - if (pkt_dev->flags & F_NODE) { - int node; - - if (pkt_dev->node >= 0) - node = pkt_dev->node; - else - node = numa_node_id(); - - skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64 - + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node); - if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD); - skb->dev = odev; - } - } - else - skb = __netdev_alloc_skb(odev, - pkt_dev->cur_pkt_size + 64 - + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT); - + skb = pktgen_alloc_skb(odev, pkt_dev, datalen); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } - prefetchw(skb->data); + prefetchw(skb->data); skb_reserve(skb, datalen); /* Reserve for ethernet and IP header */ @@ -2708,15 +2712,15 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IP); } - skb->network_header = skb->tail; - skb->transport_header = skb->network_header + sizeof(struct iphdr); - skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, skb->len); + iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr)); + + skb_set_transport_header(skb, skb->len); + udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); skb->priority = pkt_dev->skb_priority; - iph = ip_hdr(skb); - udph = udp_hdr(skb); - memcpy(eth, pkt_dev->hh, 12); *(__be16 *) & eth[12] = protocol; @@ -2746,8 +2750,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->check = 0; iph->check = ip_fast_csum((void *)iph, iph->ihl); skb->protocol = protocol; - skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->pkt_overhead); skb->dev = odev; skb->pkt_type = PACKET_HOST; pktgen_finalize_skb(pkt_dev, skb, datalen); @@ -2788,15 +2790,13 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - skb = __netdev_alloc_skb(odev, - pkt_dev->cur_pkt_size + 64 - + 16 + pkt_dev->pkt_overhead, GFP_NOWAIT); + skb = pktgen_alloc_skb(odev, pkt_dev, 16); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } - prefetchw(skb->data); + prefetchw(skb->data); skb_reserve(skb, 16); /* Reserve for ethernet and IP header */ @@ -2822,13 +2822,14 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IPV6); } - skb->network_header = skb->tail; - skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); - skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, skb->len); + iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); + + skb_set_transport_header(skb, skb->len); + udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); skb->priority = pkt_dev->skb_priority; - iph = ipv6_hdr(skb); - udph = udp_hdr(skb); memcpy(eth, pkt_dev->hh, 12); *(__be16 *) ð[12] = protocol; @@ -2863,8 +2864,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, iph->daddr = pkt_dev->cur_in6_daddr; iph->saddr = pkt_dev->cur_in6_saddr; - skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->pkt_overhead); skb->protocol = protocol; skb->dev = odev; skb->pkt_type = PACKET_HOST; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a08bd2b7fe3f..9007533867f0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -947,6 +947,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_vlan vf_vlan; struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_spoofchk vf_spoofchk; + struct ifla_vf_link_state vf_linkstate; /* * Not all SR-IOV capable drivers support the @@ -956,18 +957,24 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, */ ivi.spoofchk = -1; memset(ivi.mac, 0, sizeof(ivi.mac)); + /* The default value for VF link state is "auto" + * IFLA_VF_LINK_STATE_AUTO which equals zero + */ + ivi.linkstate = 0; if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) break; vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = - vf_spoofchk.vf = ivi.vf; + vf_spoofchk.vf = + vf_linkstate.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; vf_tx_rate.rate = ivi.tx_rate; vf_spoofchk.setting = ivi.spoofchk; + vf_linkstate.link_state = ivi.linkstate; vf = nla_nest_start(skb, IFLA_VF_INFO); if (!vf) { nla_nest_cancel(skb, vfinfo); @@ -978,7 +985,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate) || nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), - &vf_spoofchk)) + &vf_spoofchk) || + nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate), + &vf_linkstate)) goto nla_put_failure; nla_nest_end(skb, vf); } @@ -1238,6 +1247,15 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) ivs->setting); break; } + case IFLA_VF_LINK_STATE: { + struct ifla_vf_link_state *ivl; + ivl = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_link_state) + err = ops->ndo_set_vf_link_state(dev, ivl->vf, + ivl->link_state); + break; + } default: err = -EINVAL; break; @@ -2667,7 +2685,7 @@ static void rtnetlink_rcv(struct sk_buff *skb) static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_UP: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cfd777bd6bd0..edf37578e21e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -199,9 +199,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) skb->truesize = sizeof(struct sk_buff); atomic_set(&skb->users, 1); -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; out: return skb; } @@ -275,10 +273,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; - skb->transport_header = ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; + skb->transport_header = (typeof(skb->transport_header))~0U; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); @@ -344,10 +340,8 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; - skb->transport_header = ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; + skb->transport_header = (typeof(skb->transport_header))~0U; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); @@ -739,6 +733,10 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->vlan_tci = old->vlan_tci; skb_copy_secmark(new, old); + +#ifdef CONFIG_NET_LL_RX_POLL + new->napi_id = old->napi_id; +#endif } /* @@ -911,18 +909,8 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off) static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { -#ifndef NET_SKBUFF_DATA_USES_OFFSET - /* - * Shift between the two data areas in bytes - */ - unsigned long offset = new->data - old->data; -#endif - __copy_skb_header(new, old); -#ifndef NET_SKBUFF_DATA_USES_OFFSET - skb_headers_offset_update(new, offset); -#endif skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; @@ -1114,7 +1102,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->end = skb->head + size; #endif skb->tail += off; - skb_headers_offset_update(skb, off); + skb_headers_offset_update(skb, nhead); /* Only adjust this if it actually is csum_start rather than csum */ if (skb->ip_summed == CHECKSUM_PARTIAL) skb->csum_start += nhead; @@ -1209,9 +1197,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, off = newheadroom - oldheadroom; if (n->ip_summed == CHECKSUM_PARTIAL) n->csum_start += off; -#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb_headers_offset_update(n, off); -#endif return n; } @@ -2853,7 +2840,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) doffset + tnl_hlen); if (fskb != skb_shinfo(skb)->frag_list) - continue; + goto perform_csum_check; if (!sg) { nskb->ip_summed = CHECKSUM_NONE; @@ -2917,6 +2904,7 @@ skip_fraglist: nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; +perform_csum_check: if (!csum) { nskb->csum = skb_checksum(nskb, doffset, nskb->len - doffset, 0); diff --git a/net/core/sock.c b/net/core/sock.c index 88868a9d21da..1e744b12fda3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -139,6 +139,8 @@ #include <net/tcp.h> #endif +#include <net/ll_poll.h> + static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); @@ -911,6 +913,19 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); break; +#ifdef CONFIG_NET_LL_RX_POLL + case SO_LL: + /* allow unprivileged users to decrease the value */ + if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) + ret = -EPERM; + else { + if (val < 0) + ret = -EINVAL; + else + sk->sk_ll_usec = val; + } + break; +#endif default: ret = -ENOPROTOOPT; break; @@ -1168,6 +1183,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); break; +#ifdef CONFIG_NET_LL_RX_POLL + case SO_LL: + v.val = sk->sk_ll_usec; + break; +#endif + default: return -ENOPROTOOPT; } @@ -2284,6 +2305,11 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); +#ifdef CONFIG_NET_LL_RX_POLL + sk->sk_napi_id = 0; + sk->sk_ll_usec = sysctl_net_ll_poll; +#endif + /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cfdb46ab3a7f..62702c2053de 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -19,16 +19,17 @@ #include <net/ip.h> #include <net/sock.h> #include <net/net_ratelimit.h> +#include <net/ll_poll.h> static int one = 1; #ifdef CONFIG_RPS -static int rps_sock_flow_sysctl(ctl_table *table, int write, +static int rps_sock_flow_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { unsigned int orig_size, size; int ret, i; - ctl_table tmp = { + struct ctl_table tmp = { .data = &size, .maxlen = sizeof(size), .mode = table->mode @@ -87,6 +88,109 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, } #endif /* CONFIG_RPS */ +#ifdef CONFIG_NET_FLOW_LIMIT +static DEFINE_MUTEX(flow_limit_update_mutex); + +static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct sd_flow_limit *cur; + struct softnet_data *sd; + cpumask_var_t mask; + int i, len, ret = 0; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + if (write) { + ret = cpumask_parse_user(buffer, *lenp, mask); + if (ret) + goto done; + + mutex_lock(&flow_limit_update_mutex); + len = sizeof(*cur) + netdev_flow_limit_table_len; + for_each_possible_cpu(i) { + sd = &per_cpu(softnet_data, i); + cur = rcu_dereference_protected(sd->flow_limit, + lockdep_is_held(&flow_limit_update_mutex)); + if (cur && !cpumask_test_cpu(i, mask)) { + RCU_INIT_POINTER(sd->flow_limit, NULL); + synchronize_rcu(); + kfree(cur); + } else if (!cur && cpumask_test_cpu(i, mask)) { + cur = kzalloc(len, GFP_KERNEL); + if (!cur) { + /* not unwinding previous changes */ + ret = -ENOMEM; + goto write_unlock; + } + cur->num_buckets = netdev_flow_limit_table_len; + rcu_assign_pointer(sd->flow_limit, cur); + } + } +write_unlock: + mutex_unlock(&flow_limit_update_mutex); + } else { + char kbuf[128]; + + if (*ppos || !*lenp) { + *lenp = 0; + goto done; + } + + cpumask_clear(mask); + rcu_read_lock(); + for_each_possible_cpu(i) { + sd = &per_cpu(softnet_data, i); + if (rcu_dereference(sd->flow_limit)) + cpumask_set_cpu(i, mask); + } + rcu_read_unlock(); + + len = min(sizeof(kbuf) - 1, *lenp); + len = cpumask_scnprintf(kbuf, len, mask); + if (!len) { + *lenp = 0; + goto done; + } + if (len < *lenp) + kbuf[len++] = '\n'; + if (copy_to_user(buffer, kbuf, len)) { + ret = -EFAULT; + goto done; + } + *lenp = len; + *ppos += len; + } + +done: + free_cpumask_var(mask); + return ret; +} + +static int flow_limit_table_len_sysctl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + unsigned int old, *ptr; + int ret; + + mutex_lock(&flow_limit_update_mutex); + + ptr = table->data; + old = *ptr; + ret = proc_dointvec(table, write, buffer, lenp, ppos); + if (!ret && write && !is_power_of_2(*ptr)) { + *ptr = old; + ret = -EINVAL; + } + + mutex_unlock(&flow_limit_update_mutex); + return ret; +} +#endif /* CONFIG_NET_FLOW_LIMIT */ + static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { @@ -180,6 +284,29 @@ static struct ctl_table net_core_table[] = { .proc_handler = rps_sock_flow_sysctl }, #endif +#ifdef CONFIG_NET_FLOW_LIMIT + { + .procname = "flow_limit_cpu_bitmap", + .mode = 0644, + .proc_handler = flow_limit_cpu_sysctl + }, + { + .procname = "flow_limit_table_len", + .data = &netdev_flow_limit_table_len, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = flow_limit_table_len_sysctl + }, +#endif /* CONFIG_NET_FLOW_LIMIT */ +#ifdef CONFIG_NET_LL_RX_POLL + { + .procname = "low_latency_poll", + .data = &sysctl_net_ll_poll, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec + }, +#endif #endif /* CONFIG_NET */ { .procname = "netdev_budget", diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index c21f200eed93..dd4d506ef923 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2078,9 +2078,9 @@ out_err: } static int dn_device_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 7d9197063ebb..dd0dfb25f4b1 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -158,11 +158,11 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU static int min_priority[1]; static int max_priority[] = { 127 }; /* From DECnet spec */ -static int dn_forwarding_proc(ctl_table *, int, +static int dn_forwarding_proc(struct ctl_table *, int, void __user *, size_t *, loff_t *); static struct dn_dev_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table dn_dev_vars[5]; + struct ctl_table dn_dev_vars[5]; } dn_dev_sysctl = { NULL, { @@ -242,7 +242,7 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) } } -static int dn_forwarding_proc(ctl_table *table, int write, +static int dn_forwarding_proc(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index a55eeccaa72f..5325b541c526 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -132,7 +132,7 @@ static int parse_addr(__le16 *addr, char *str) return 0; } -static int dn_node_address_handler(ctl_table *table, int write, +static int dn_node_address_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -183,7 +183,7 @@ static int dn_node_address_handler(ctl_table *table, int write, return 0; } -static int dn_def_dev_handler(ctl_table *table, int write, +static int dn_def_dev_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -246,7 +246,7 @@ static int dn_def_dev_handler(ctl_table *table, int write, return 0; } -static ctl_table dn_table[] = { +static struct ctl_table dn_table[] = { { .procname = "node_address", .maxlen = 7, diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 55e1fd5b3e56..3b9d5f20bd1c 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1352,10 +1352,9 @@ static inline void lowpan_netlink_fini(void) } static int lowpan_device_event(struct notifier_block *unused, - unsigned long event, - void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); LIST_HEAD(del_list); struct lowpan_dev_record *entry, *tmp; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8603ca827104..37cf1a6ea3ad 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -9,10 +9,7 @@ config IP_MULTICAST intend to participate in the MBONE, a high bandwidth network on top of the Internet which carries audio and video broadcasts. More information about the MBONE is on the WWW at - <http://www.savetz.com/mbone/>. Information about the multicast - capabilities of the various network cards is contained in - <file:Documentation/networking/multicast.txt>. For most people, it's - safe to say N. + <http://www.savetz.com/mbone/>. For most people, it's safe to say N. config IP_ADVANCED_ROUTER bool "IP: advanced router" @@ -223,10 +220,8 @@ config IP_MROUTE packets that have several destination addresses. It is needed on the MBONE, a high bandwidth network on top of the Internet which carries audio and video broadcasts. In order to do that, you would most - likely run the program mrouted. Information about the multicast - capabilities of the various network cards is contained in - <file:Documentation/networking/multicast.txt>. If you haven't heard - about it, you don't need it. + likely run the program mrouted. If you haven't heard about it, you + don't need it. config IP_MROUTE_MULTIPLE_TABLES bool "IP: multicast policy routing" diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 089cb9f36387..7fcf8101d85f 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,8 +8,8 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ - datagram.o raw.o udp.o udplite.o \ - arp.o icmp.o devinet.o af_inet.o igmp.o \ + tcp_offload.o datagram.o raw.o udp.o udplite.o \ + udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o ping.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d01be2a3ae53..b4d0be2b7ce9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1295,6 +1295,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_GRE | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | + SKB_GSO_MPLS | 0))) goto out; @@ -1384,7 +1385,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, goto out_unlock; id = ntohl(*(__be32 *)&iph->id); - flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); + flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF)); id >>= 16; for (p = *head; p; p = p->next) { @@ -1406,6 +1407,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | + (__force int)((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; @@ -1557,15 +1559,6 @@ static const struct net_protocol tcp_protocol = { .netns_ok = 1, }; -static const struct net_offload tcp_offload = { - .callbacks = { - .gso_send_check = tcp_v4_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp4_gro_receive, - .gro_complete = tcp4_gro_complete, - }, -}; - static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, @@ -1573,13 +1566,6 @@ static const struct net_protocol udp_protocol = { .netns_ok = 1, }; -static const struct net_offload udp_offload = { - .callbacks = { - .gso_send_check = udp4_ufo_send_check, - .gso_segment = udp4_ufo_fragment, - }, -}; - static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, .err_handler = icmp_err, @@ -1679,10 +1665,10 @@ static int __init ipv4_offload_init(void) /* * Add offloads */ - if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) + if (udpv4_offload_init() < 0) pr_crit("%s: Cannot add UDP protocol offload\n", __func__); - if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0) - pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__); + if (tcpv4_offload_init() < 0) + pr_crit("%s: Cannot add TCP protocol offload\n", __func__); dev_add_offload(&ip_packet_offload); return 0; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 2e7f1948216f..717902669d2f 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -419,12 +419,9 @@ static void ah4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 247ec1951c35..4429b013f269 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1234,13 +1234,19 @@ out: static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_change_info *change_info; switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); rt_cache_flush(dev_net(dev)); break; + case NETDEV_CHANGE: + change_info = ptr; + if (change_info->flags_changed & IFF_NOARP) + neigh_changeaddr(&arp_tbl, dev); + break; default: break; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index dfc39d4d48b7..8d48c392adcc 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -215,6 +215,7 @@ void in_dev_finish_destroy(struct in_device *idev) WARN_ON(idev->ifa_list); WARN_ON(idev->mc_list); + kfree(rcu_dereference_protected(idev->mc_hash, 1)); #ifdef NET_REFCNT_DEBUG pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); #endif @@ -1333,7 +1334,7 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev, static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); @@ -1941,7 +1942,7 @@ static void inet_forward_change(struct net *net) } } -static int devinet_conf_proc(ctl_table *ctl, int write, +static int devinet_conf_proc(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -1984,7 +1985,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, return ret; } -static int devinet_sysctl_forward(ctl_table *ctl, int write, +static int devinet_sysctl_forward(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -2027,7 +2028,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, return ret; } -static int ipv4_doint_and_flush(ctl_table *ctl, int write, +static int ipv4_doint_and_flush(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 4cfe34d4cc96..ab3d814bc80a 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -502,12 +502,9 @@ static void esp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c7629a209f9d..05a4888dede9 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1038,7 +1038,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev; struct net *net = dev_net(dev); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 76e10b47e053..5f7d11a45871 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -482,7 +482,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct iphdr *iph; int room; - struct icmp_bxm icmp_param; + struct icmp_bxm *icmp_param; struct rtable *rt = skb_rtable(skb_in); struct ipcm_cookie ipc; struct flowi4 fl4; @@ -503,7 +503,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) iph = ip_hdr(skb_in); if ((u8 *)iph < skb_in->head || - (skb_in->network_header + sizeof(*iph)) > skb_in->tail) + (skb_network_header(skb_in) + sizeof(*iph)) > + skb_tail_pointer(skb_in)) goto out; /* @@ -557,9 +558,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } } + icmp_param = kmalloc(sizeof(*icmp_param), GFP_ATOMIC); + if (!icmp_param) + return; + sk = icmp_xmit_lock(net); if (sk == NULL) - return; + goto out_free; /* * Construct source address and options. @@ -585,7 +590,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) IPTOS_PREC_INTERNETCONTROL) : iph->tos; - if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) + if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) goto out_unlock; @@ -593,19 +598,19 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) * Prepare data for ICMP header. */ - icmp_param.data.icmph.type = type; - icmp_param.data.icmph.code = code; - icmp_param.data.icmph.un.gateway = info; - icmp_param.data.icmph.checksum = 0; - icmp_param.skb = skb_in; - icmp_param.offset = skb_network_offset(skb_in); + icmp_param->data.icmph.type = type; + icmp_param->data.icmph.code = code; + icmp_param->data.icmph.un.gateway = info; + icmp_param->data.icmph.checksum = 0; + icmp_param->skb = skb_in; + icmp_param->offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; - ipc.opt = &icmp_param.replyopts.opt; + ipc.opt = &icmp_param->replyopts.opt; ipc.tx_flags = 0; rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, - type, code, &icmp_param); + type, code, icmp_param); if (IS_ERR(rt)) goto out_unlock; @@ -617,19 +622,21 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) room = dst_mtu(&rt->dst); if (room > 576) room = 576; - room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; + room -= sizeof(struct iphdr) + icmp_param->replyopts.opt.opt.optlen; room -= sizeof(struct icmphdr); - icmp_param.data_len = skb_in->len - icmp_param.offset; - if (icmp_param.data_len > room) - icmp_param.data_len = room; - icmp_param.head_len = sizeof(struct icmphdr); + icmp_param->data_len = skb_in->len - icmp_param->offset; + if (icmp_param->data_len > room) + icmp_param->data_len = room; + icmp_param->head_len = sizeof(struct icmphdr); - icmp_push_reply(&icmp_param, &fl4, &ipc, &rt); + icmp_push_reply(icmp_param, &fl4, &ipc, &rt); ende: ip_rt_put(rt); out_unlock: icmp_xmit_unlock(sk); +out_free: + kfree(icmp_param); out:; } EXPORT_SYMBOL(icmp_send); @@ -657,7 +664,8 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) } /* - * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH. + * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and + * ICMP_PARAMETERPROB. */ static void icmp_unreach(struct sk_buff *skb) @@ -939,7 +947,8 @@ error: void icmp_err(struct sk_buff *skb, u32 info) { struct iphdr *iph = (struct iphdr *)skb->data; - struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int offset = iph->ihl<<2; + struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset); int type = icmp_hdr(skb)->type; int code = icmp_hdr(skb)->code; struct net *net = dev_net(skb->dev); @@ -949,7 +958,7 @@ void icmp_err(struct sk_buff *skb, u32 info) * triggered by ICMP_ECHOREPLY which sent from kernel. */ if (icmph->type != ICMP_ECHOREPLY) { - ping_err(skb, info); + ping_err(skb, offset, info); return; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d8c232794bcb..cd71190d2962 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -363,7 +363,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) static int igmpv3_sendpack(struct sk_buff *skb) { struct igmphdr *pig = igmp_hdr(skb); - const int igmplen = skb->tail - skb->transport_header; + const int igmplen = skb_tail_pointer(skb) - skb_transport_header(skb); pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen); @@ -1217,6 +1217,57 @@ static void igmp_group_added(struct ip_mc_list *im) * Multicast list managers */ +static u32 ip_mc_hash(const struct ip_mc_list *im) +{ + return hash_32((__force u32)im->multiaddr, MC_HASH_SZ_LOG); +} + +static void ip_mc_hash_add(struct in_device *in_dev, + struct ip_mc_list *im) +{ + struct ip_mc_list __rcu **mc_hash; + u32 hash; + + mc_hash = rtnl_dereference(in_dev->mc_hash); + if (mc_hash) { + hash = ip_mc_hash(im); + im->next_hash = mc_hash[hash]; + rcu_assign_pointer(mc_hash[hash], im); + return; + } + + /* do not use a hash table for small number of items */ + if (in_dev->mc_count < 4) + return; + + mc_hash = kzalloc(sizeof(struct ip_mc_list *) << MC_HASH_SZ_LOG, + GFP_KERNEL); + if (!mc_hash) + return; + + for_each_pmc_rtnl(in_dev, im) { + hash = ip_mc_hash(im); + im->next_hash = mc_hash[hash]; + RCU_INIT_POINTER(mc_hash[hash], im); + } + + rcu_assign_pointer(in_dev->mc_hash, mc_hash); +} + +static void ip_mc_hash_remove(struct in_device *in_dev, + struct ip_mc_list *im) +{ + struct ip_mc_list __rcu **mc_hash = rtnl_dereference(in_dev->mc_hash); + struct ip_mc_list *aux; + + if (!mc_hash) + return; + mc_hash += ip_mc_hash(im); + while ((aux = rtnl_dereference(*mc_hash)) != im) + mc_hash = &aux->next_hash; + *mc_hash = im->next_hash; +} + /* * A socket has joined a multicast group on device dev. @@ -1258,6 +1309,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) in_dev->mc_count++; rcu_assign_pointer(in_dev->mc_list, im); + ip_mc_hash_add(in_dev, im); + #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); #endif @@ -1314,6 +1367,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ip = &i->next_rcu) { if (i->multiaddr == addr) { if (--i->users == 0) { + ip_mc_hash_remove(in_dev, i); *ip = i->next_rcu; in_dev->mc_count--; igmp_group_dropped(i); @@ -1381,13 +1435,9 @@ void ip_mc_init_dev(struct in_device *in_dev) { ASSERT_RTNL(); - in_dev->mc_tomb = NULL; #ifdef CONFIG_IP_MULTICAST - in_dev->mr_gq_running = 0; setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire, (unsigned long)in_dev); - in_dev->mr_ifc_count = 0; - in_dev->mc_count = 0; setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, (unsigned long)in_dev); in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; @@ -2321,12 +2371,25 @@ void ip_mc_drop_socket(struct sock *sk) int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) { struct ip_mc_list *im; + struct ip_mc_list __rcu **mc_hash; struct ip_sf_list *psf; int rv = 0; - for_each_pmc_rcu(in_dev, im) { - if (im->multiaddr == mc_addr) - break; + mc_hash = rcu_dereference(in_dev->mc_hash); + if (mc_hash) { + u32 hash = hash_32((__force u32)mc_addr, MC_HASH_SZ_LOG); + + for (im = rcu_dereference(mc_hash[hash]); + im != NULL; + im = rcu_dereference(im->next_hash)) { + if (im->multiaddr == mc_addr) + break; + } + } else { + for_each_pmc_rcu(in_dev, im) { + if (im->multiaddr == mc_addr) + break; + } } if (im && proto == IPPROTO_IGMP) { rv = 1; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2a83591492dd..a982657d05e7 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -429,7 +429,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, return; } - ip_tunnel_xmit(skb, dev, tnl_params); + ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } static netdev_tx_t ipgre_xmit(struct sk_buff *skb, diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 7fa8f08fa7ae..e189db409b0e 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -487,7 +487,7 @@ drop: EXPORT_SYMBOL_GPL(ip_tunnel_rcv); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, - const struct iphdr *tnl_params) + const struct iphdr *tnl_params, const u8 protocol) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *inner_iph; @@ -670,7 +670,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, iph->version = 4; iph->ihl = sizeof(struct iphdr) >> 2; iph->frag_off = df; - iph->protocol = tnl_params->protocol; + iph->protocol = protocol; iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); iph->daddr = fl4.daddr; iph->saddr = fl4.saddr; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 59cb8c769056..826be4cb482a 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -47,12 +47,9 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 77bfcce64fe5..9df7ecd393f2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -222,7 +222,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->encapsulation = 1; } - ip_tunnel_xmit(skb, dev, tiph); + ip_tunnel_xmit(skb, dev, tiph, tiph->protocol); return NETDEV_TX_OK; tx_error: diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d9610ae7855..132a09664704 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -980,7 +980,7 @@ static int ipmr_cache_report(struct mr_table *mrt, /* Copy the IP header */ - skb->network_header = skb->tail; + skb_set_network_header(skb, skb->len); skb_put(skb, ihl); skb_copy_to_linear_data(skb, pkt->data, ihl); ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ @@ -1609,7 +1609,7 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct mr_table *mrt; struct vif_device *v; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index e7916c193932..4e9028017428 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -111,7 +111,7 @@ config IP_NF_TARGET_REJECT To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_ULOG - tristate "ULOG target support" + tristate "ULOG target support (obsolete)" default m if NETFILTER_ADVANCED=n ---help--- diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 5d5d4d1be9c2..30e4de940567 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -108,7 +108,7 @@ static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - const struct net_device *dev = ptr; + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_DOWN) { @@ -129,7 +129,10 @@ static int masq_inet_event(struct notifier_block *this, void *ptr) { struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; - return masq_device_event(this, event, dev); + struct netdev_notifier_info info; + + netdev_notifier_info_init(&info, dev); + return masq_device_event(this, event, &info); } static struct notifier_block masq_dev_notifier = { diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index ff4b781b1056..57c671152c42 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -330,6 +330,12 @@ static int ulog_tg_check(const struct xt_tgchk_param *par) { const struct ipt_ulog_info *loginfo = par->targinfo; + if (!par->net->xt.ulog_warn_deprecated) { + pr_info("ULOG is deprecated and it will be removed soon, " + "use NFLOG instead\n"); + par->net->xt.ulog_warn_deprecated = true; + } + if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { pr_debug("prefix not null-terminated\n"); return -EINVAL; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 567d84168bd2..0a2e0e3e95ba 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -223,7 +223,7 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { static int log_invalid_proto_min = 0; static int log_invalid_proto_max = 255; -static ctl_table ip_ct_sysctl_table[] = { +static struct ctl_table ip_ct_sysctl_table[] = { { .procname = "ip_conntrack_max", .maxlen = sizeof(int), diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 7d93d62cd5fd..746427c9e719 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -33,7 +33,6 @@ #include <linux/netdevice.h> #include <net/snmp.h> #include <net/ip.h> -#include <net/ipv6.h> #include <net/icmp.h> #include <net/protocol.h> #include <linux/skbuff.h> @@ -46,8 +45,18 @@ #include <net/inet_common.h> #include <net/checksum.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <linux/in6.h> +#include <linux/icmpv6.h> +#include <net/addrconf.h> +#include <net/ipv6.h> +#include <net/transp_v6.h> +#endif -static struct ping_table ping_table; + +struct ping_table ping_table; +struct pingv6_ops pingv6_ops; +EXPORT_SYMBOL_GPL(pingv6_ops); static u16 ping_port_rover; @@ -58,6 +67,7 @@ static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int ma pr_debug("hash(%d) = %d\n", num, res); return res; } +EXPORT_SYMBOL_GPL(ping_hash); static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, struct net *net, unsigned int num) @@ -65,7 +75,7 @@ static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; } -static int ping_v4_get_port(struct sock *sk, unsigned short ident) +int ping_get_port(struct sock *sk, unsigned short ident) { struct hlist_nulls_node *node; struct hlist_nulls_head *hlist; @@ -103,6 +113,10 @@ next_port: ping_portaddr_for_each_entry(sk2, node, hlist) { isk2 = inet_sk(sk2); + /* BUG? Why is this reuse and not reuseaddr? ping.c + * doesn't turn off SO_REUSEADDR, and it doesn't expect + * that other ping processes can steal its packets. + */ if ((isk2->inet_num == ident) && (sk2 != sk) && (!sk2->sk_reuse || !sk->sk_reuse)) @@ -125,17 +139,18 @@ fail: write_unlock_bh(&ping_table.lock); return 1; } +EXPORT_SYMBOL_GPL(ping_get_port); -static void ping_v4_hash(struct sock *sk) +void ping_hash(struct sock *sk) { - pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); + pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); BUG(); /* "Please do not press this button again." */ } -static void ping_v4_unhash(struct sock *sk) +void ping_unhash(struct sock *sk) { struct inet_sock *isk = inet_sk(sk); - pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); + pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); if (sk_hashed(sk)) { write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); @@ -146,31 +161,61 @@ static void ping_v4_unhash(struct sock *sk) write_unlock_bh(&ping_table.lock); } } +EXPORT_SYMBOL_GPL(ping_unhash); -static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr, - u16 ident, int dif) +static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) { struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); struct sock *sk = NULL; struct inet_sock *isk; struct hlist_nulls_node *hnode; + int dif = skb->dev->ifindex; + + if (skb->protocol == htons(ETH_P_IP)) { + pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", + (int)ident, &ip_hdr(skb)->daddr, dif); +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n", + (int)ident, &ipv6_hdr(skb)->daddr, dif); +#endif + } - pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", - (int)ident, &daddr, dif); read_lock_bh(&ping_table.lock); ping_portaddr_for_each_entry(sk, hnode, hslot) { isk = inet_sk(sk); - pr_debug("found: %p: num = %d, daddr = %pI4, dif = %d\n", sk, - (int)isk->inet_num, &isk->inet_rcv_saddr, - sk->sk_bound_dev_if); - pr_debug("iterate\n"); if (isk->inet_num != ident) continue; - if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr) - continue; + + if (skb->protocol == htons(ETH_P_IP) && + sk->sk_family == AF_INET) { + pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk, + (int) isk->inet_num, &isk->inet_rcv_saddr, + sk->sk_bound_dev_if); + + if (isk->inet_rcv_saddr && + isk->inet_rcv_saddr != ip_hdr(skb)->daddr) + continue; +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6) && + sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk, + (int) isk->inet_num, + &inet6_sk(sk)->rcv_saddr, + sk->sk_bound_dev_if); + + if (!ipv6_addr_any(&np->rcv_saddr) && + !ipv6_addr_equal(&np->rcv_saddr, + &ipv6_hdr(skb)->daddr)) + continue; +#endif + } + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) continue; @@ -200,7 +245,7 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, } -static int ping_init_sock(struct sock *sk) +int ping_init_sock(struct sock *sk) { struct net *net = sock_net(sk); kgid_t group = current_egid(); @@ -225,8 +270,9 @@ static int ping_init_sock(struct sock *sk) return -EACCES; } +EXPORT_SYMBOL_GPL(ping_init_sock); -static void ping_close(struct sock *sk, long timeout) +void ping_close(struct sock *sk, long timeout) { pr_debug("ping_close(sk=%p,sk->num=%u)\n", inet_sk(sk), inet_sk(sk)->inet_num); @@ -234,36 +280,122 @@ static void ping_close(struct sock *sk, long timeout) sk_common_release(sk); } +EXPORT_SYMBOL_GPL(ping_close); + +/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ +static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, + struct sockaddr *uaddr, int addr_len) { + struct net *net = sock_net(sk); + if (sk->sk_family == AF_INET) { + struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; + int chk_addr_ret; + + if (addr_len < sizeof(*addr)) + return -EINVAL; + + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", + sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); + + chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); + + if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) + chk_addr_ret = RTN_LOCAL; + + if ((sysctl_ip_nonlocal_bind == 0 && + isk->freebind == 0 && isk->transparent == 0 && + chk_addr_ret != RTN_LOCAL) || + chk_addr_ret == RTN_MULTICAST || + chk_addr_ret == RTN_BROADCAST) + return -EADDRNOTAVAIL; + +#if IS_ENABLED(CONFIG_IPV6) + } else if (sk->sk_family == AF_INET6) { + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; + int addr_type, scoped, has_addr; + struct net_device *dev = NULL; + + if (addr_len < sizeof(*addr)) + return -EINVAL; + + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", + sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); + + addr_type = ipv6_addr_type(&addr->sin6_addr); + scoped = __ipv6_addr_needs_scope_id(addr_type); + if ((addr_type != IPV6_ADDR_ANY && + !(addr_type & IPV6_ADDR_UNICAST)) || + (scoped && !addr->sin6_scope_id)) + return -EINVAL; + + rcu_read_lock(); + if (addr->sin6_scope_id) { + dev = dev_get_by_index_rcu(net, addr->sin6_scope_id); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + } + has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev, + scoped); + rcu_read_unlock(); + + if (!(isk->freebind || isk->transparent || has_addr || + addr_type == IPV6_ADDR_ANY)) + return -EADDRNOTAVAIL; + + if (scoped) + sk->sk_bound_dev_if = addr->sin6_scope_id; +#endif + } else { + return -EAFNOSUPPORT; + } + return 0; +} + +static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) +{ + if (saddr->sa_family == AF_INET) { + struct inet_sock *isk = inet_sk(sk); + struct sockaddr_in *addr = (struct sockaddr_in *) saddr; + isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; +#if IS_ENABLED(CONFIG_IPV6) + } else if (saddr->sa_family == AF_INET6) { + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr; + struct ipv6_pinfo *np = inet6_sk(sk); + np->rcv_saddr = np->saddr = addr->sin6_addr; +#endif + } +} +static void ping_clear_saddr(struct sock *sk, int dif) +{ + sk->sk_bound_dev_if = dif; + if (sk->sk_family == AF_INET) { + struct inet_sock *isk = inet_sk(sk); + isk->inet_rcv_saddr = isk->inet_saddr = 0; +#if IS_ENABLED(CONFIG_IPV6) + } else if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr)); + memset(&np->saddr, 0, sizeof(np->saddr)); +#endif + } +} /* * We need our own bind because there are no privileged id's == local ports. * Moreover, we don't allow binding to multi- and broadcast addresses. */ -static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct inet_sock *isk = inet_sk(sk); unsigned short snum; - int chk_addr_ret; int err; + int dif = sk->sk_bound_dev_if; - if (addr_len < sizeof(struct sockaddr_in)) - return -EINVAL; - - pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n", - sk, addr->sin_addr.s_addr, ntohs(addr->sin_port)); - - chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); - if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) - chk_addr_ret = RTN_LOCAL; - - if ((sysctl_ip_nonlocal_bind == 0 && - isk->freebind == 0 && isk->transparent == 0 && - chk_addr_ret != RTN_LOCAL) || - chk_addr_ret == RTN_MULTICAST || - chk_addr_ret == RTN_BROADCAST) - return -EADDRNOTAVAIL; + err = ping_check_bind_addr(sk, isk, uaddr, addr_len); + if (err) + return err; lock_sock(sk); @@ -272,42 +404,50 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto out; err = -EADDRINUSE; - isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; - snum = ntohs(addr->sin_port); - if (ping_v4_get_port(sk, snum) != 0) { - isk->inet_saddr = isk->inet_rcv_saddr = 0; + ping_set_saddr(sk, uaddr); + snum = ntohs(((struct sockaddr_in *)uaddr)->sin_port); + if (ping_get_port(sk, snum) != 0) { + ping_clear_saddr(sk, dif); goto out; } - pr_debug("after bind(): num = %d, daddr = %pI4, dif = %d\n", + pr_debug("after bind(): num = %d, dif = %d\n", (int)isk->inet_num, - &isk->inet_rcv_saddr, (int)sk->sk_bound_dev_if); err = 0; - if (isk->inet_rcv_saddr) + if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) || + (sk->sk_family == AF_INET6 && + !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr))) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; + if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; isk->inet_sport = htons(isk->inet_num); isk->inet_daddr = 0; isk->inet_dport = 0; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr)); +#endif + sk_dst_reset(sk); out: release_sock(sk); pr_debug("ping_v4_bind -> %d\n", err); return err; } +EXPORT_SYMBOL_GPL(ping_bind); /* * Is this a supported type of ICMP message? */ -static inline int ping_supported(int type, int code) +static inline int ping_supported(int family, int type, int code) { - if (type == ICMP_ECHO && code == 0) - return 1; - return 0; + return (family == AF_INET && type == ICMP_ECHO && code == 0) || + (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0); } /* @@ -315,30 +455,42 @@ static inline int ping_supported(int type, int code) * sort of error condition. */ -static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); - -void ping_err(struct sk_buff *skb, u32 info) +void ping_err(struct sk_buff *skb, int offset, u32 info) { - struct iphdr *iph = (struct iphdr *)skb->data; - struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int family; + struct icmphdr *icmph; struct inet_sock *inet_sock; - int type = icmp_hdr(skb)->type; - int code = icmp_hdr(skb)->code; + int type; + int code; struct net *net = dev_net(skb->dev); struct sock *sk; int harderr; int err; + if (skb->protocol == htons(ETH_P_IP)) { + family = AF_INET; + type = icmp_hdr(skb)->type; + code = icmp_hdr(skb)->code; + icmph = (struct icmphdr *)(skb->data + offset); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + family = AF_INET6; + type = icmp6_hdr(skb)->icmp6_type; + code = icmp6_hdr(skb)->icmp6_code; + icmph = (struct icmphdr *) (skb->data + offset); + } else { + BUG(); + } + /* We assume the packet has already been checked by icmp_unreach */ - if (!ping_supported(icmph->type, icmph->code)) + if (!ping_supported(family, icmph->type, icmph->code)) return; - pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type, - code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); + pr_debug("ping_err(proto=0x%x,type=%d,code=%d,id=%04x,seq=%04x)\n", + skb->protocol, type, code, ntohs(icmph->un.echo.id), + ntohs(icmph->un.echo.sequence)); - sk = ping_v4_lookup(net, iph->daddr, iph->saddr, - ntohs(icmph->un.echo.id), skb->dev->ifindex); + sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk == NULL) { pr_debug("no socket, dropping\n"); return; /* No socket for error */ @@ -349,72 +501,83 @@ void ping_err(struct sk_buff *skb, u32 info) harderr = 0; inet_sock = inet_sk(sk); - switch (type) { - default: - case ICMP_TIME_EXCEEDED: - err = EHOSTUNREACH; - break; - case ICMP_SOURCE_QUENCH: - /* This is not a real error but ping wants to see it. - * Report it with some fake errno. */ - err = EREMOTEIO; - break; - case ICMP_PARAMETERPROB: - err = EPROTO; - harderr = 1; - break; - case ICMP_DEST_UNREACH: - if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ - ipv4_sk_update_pmtu(skb, sk, info); - if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { - err = EMSGSIZE; - harderr = 1; - break; + if (skb->protocol == htons(ETH_P_IP)) { + switch (type) { + default: + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + case ICMP_SOURCE_QUENCH: + /* This is not a real error but ping wants to see it. + * Report it with some fake errno. + */ + err = EREMOTEIO; + break; + case ICMP_PARAMETERPROB: + err = EPROTO; + harderr = 1; + break; + case ICMP_DEST_UNREACH: + if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + ipv4_sk_update_pmtu(skb, sk, info); + if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { + err = EMSGSIZE; + harderr = 1; + break; + } + goto out; } - goto out; - } - err = EHOSTUNREACH; - if (code <= NR_ICMP_UNREACH) { - harderr = icmp_err_convert[code].fatal; - err = icmp_err_convert[code].errno; + err = EHOSTUNREACH; + if (code <= NR_ICMP_UNREACH) { + harderr = icmp_err_convert[code].fatal; + err = icmp_err_convert[code].errno; + } + break; + case ICMP_REDIRECT: + /* See ICMP_SOURCE_QUENCH */ + ipv4_sk_redirect(skb, sk); + err = EREMOTEIO; + break; } - break; - case ICMP_REDIRECT: - /* See ICMP_SOURCE_QUENCH */ - ipv4_sk_redirect(skb, sk); - err = EREMOTEIO; - break; +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + harderr = pingv6_ops.icmpv6_err_convert(type, code, &err); +#endif } /* * RFC1122: OK. Passes ICMP errors back to application, as per * 4.1.3.3. */ - if (!inet_sock->recverr) { + if ((family == AF_INET && !inet_sock->recverr) || + (family == AF_INET6 && !inet6_sk(sk)->recverr)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { - ip_icmp_error(sk, skb, err, 0 /* no remote port */, - info, (u8 *)icmph); + if (family == AF_INET) { + ip_icmp_error(sk, skb, err, 0 /* no remote port */, + info, (u8 *)icmph); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + pingv6_ops.ipv6_icmp_error(sk, skb, err, 0, + info, (u8 *)icmph); +#endif + } } sk->sk_err = err; sk->sk_error_report(sk); out: sock_put(sk); } +EXPORT_SYMBOL_GPL(ping_err); /* - * Copy and checksum an ICMP Echo packet from user space into a buffer. + * Copy and checksum an ICMP Echo packet from user space into a buffer + * starting from the payload. */ -struct pingfakehdr { - struct icmphdr icmph; - struct iovec *iov; - __wsum wcheck; -}; - -static int ping_getfrag(void *from, char *to, - int offset, int fraglen, int odd, struct sk_buff *skb) +int ping_getfrag(void *from, char *to, + int offset, int fraglen, int odd, struct sk_buff *skb) { struct pingfakehdr *pfh = (struct pingfakehdr *)from; @@ -425,20 +588,33 @@ static int ping_getfrag(void *from, char *to, pfh->iov, 0, fraglen - sizeof(struct icmphdr), &pfh->wcheck)) return -EFAULT; + } else if (offset < sizeof(struct icmphdr)) { + BUG(); + } else { + if (csum_partial_copy_fromiovecend + (to, pfh->iov, offset - sizeof(struct icmphdr), + fraglen, &pfh->wcheck)) + return -EFAULT; + } - return 0; +#if IS_ENABLED(CONFIG_IPV6) + /* For IPv6, checksum each skb as we go along, as expected by + * icmpv6_push_pending_frames. For IPv4, accumulate the checksum in + * wcheck, it will be finalized in ping_v4_push_pending_frames. + */ + if (pfh->family == AF_INET6) { + skb->csum = pfh->wcheck; + skb->ip_summed = CHECKSUM_NONE; + pfh->wcheck = 0; } - if (offset < sizeof(struct icmphdr)) - BUG(); - if (csum_partial_copy_fromiovecend - (to, pfh->iov, offset - sizeof(struct icmphdr), - fraglen, &pfh->wcheck)) - return -EFAULT; +#endif + return 0; } +EXPORT_SYMBOL_GPL(ping_getfrag); -static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, - struct flowi4 *fl4) +static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, + struct flowi4 *fl4) { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); @@ -450,24 +626,9 @@ static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, return ip_push_pending_frames(sk, fl4); } -static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) -{ - struct net *net = sock_net(sk); - struct flowi4 fl4; - struct inet_sock *inet = inet_sk(sk); - struct ipcm_cookie ipc; - struct icmphdr user_icmph; - struct pingfakehdr pfh; - struct rtable *rt = NULL; - struct ip_options_data opt_copy; - int free = 0; - __be32 saddr, daddr, faddr; - u8 tos; - int err; - - pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); - +int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, + void *user_icmph, size_t icmph_len) { + u8 type, code; if (len > 0xFFFF) return -EMSGSIZE; @@ -482,15 +643,53 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* * Fetch the ICMP header provided by the userland. - * iovec is modified! + * iovec is modified! The ICMP header is consumed. */ - - if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov, - sizeof(struct icmphdr))) + if (memcpy_fromiovec(user_icmph, msg->msg_iov, icmph_len)) return -EFAULT; - if (!ping_supported(user_icmph.type, user_icmph.code)) + + if (family == AF_INET) { + type = ((struct icmphdr *) user_icmph)->type; + code = ((struct icmphdr *) user_icmph)->code; +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + type = ((struct icmp6hdr *) user_icmph)->icmp6_type; + code = ((struct icmp6hdr *) user_icmph)->icmp6_code; +#endif + } else { + BUG(); + } + + if (!ping_supported(family, type, code)) return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(ping_common_sendmsg); + +int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct net *net = sock_net(sk); + struct flowi4 fl4; + struct inet_sock *inet = inet_sk(sk); + struct ipcm_cookie ipc; + struct icmphdr user_icmph; + struct pingfakehdr pfh; + struct rtable *rt = NULL; + struct ip_options_data opt_copy; + int free = 0; + __be32 saddr, daddr, faddr; + u8 tos; + int err; + + pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + /* * Get and verify the address. */ @@ -595,13 +794,14 @@ back_from_confirm: pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; pfh.iov = msg->msg_iov; pfh.wcheck = 0; + pfh.family = AF_INET; err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len, 0, &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else - err = ping_push_pending_frames(sk, &pfh, &fl4); + err = ping_v4_push_pending_frames(sk, &pfh, &fl4); release_sock(sk); out: @@ -622,11 +822,13 @@ do_confirm: goto out; } -static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *isk = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + int family = sk->sk_family; + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; struct sk_buff *skb; int copied, err; @@ -636,11 +838,22 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*sin); + if (addr_len) { + if (family == AF_INET) + *addr_len = sizeof(*sin); + else if (family == AF_INET6 && addr_len) + *addr_len = sizeof(*sin6); + } - if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + if (flags & MSG_ERRQUEUE) { + if (family == AF_INET) { + return ip_recv_error(sk, msg, len); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + return pingv6_ops.ipv6_recv_error(sk, msg, len); +#endif + } + } skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) @@ -659,15 +872,40 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sock_recv_timestamp(msg, sk, skb); - /* Copy the address. */ - if (sin) { + /* Copy the address and add cmsg data. */ + if (family == AF_INET) { + sin = (struct sockaddr_in *) msg->msg_name; sin->sin_family = AF_INET; sin->sin_port = 0 /* skb->h.uh->source */; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + + if (isk->cmsg_flags) + ip_cmsg_recv(msg, skb); + +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6hdr *ip6 = ipv6_hdr(skb); + sin6 = (struct sockaddr_in6 *) msg->msg_name; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_addr = ip6->saddr; + + sin6->sin6_flowinfo = 0; + if (np->sndflow) + sin6->sin6_flowinfo = ip6_flowinfo(ip6); + + sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); + + if (inet6_sk(sk)->rxopt.all) + pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb); +#endif + } else { + BUG(); } - if (isk->cmsg_flags) - ip_cmsg_recv(msg, skb); + err = copied; done: @@ -676,8 +914,9 @@ out: pr_debug("ping_recvmsg -> %d\n", err); return err; } +EXPORT_SYMBOL_GPL(ping_recvmsg); -static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", inet_sk(sk), inet_sk(sk)->inet_num, skb); @@ -688,6 +927,7 @@ static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } return 0; } +EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); /* @@ -698,10 +938,7 @@ void ping_rcv(struct sk_buff *skb) { struct sock *sk; struct net *net = dev_net(skb->dev); - struct iphdr *iph = ip_hdr(skb); struct icmphdr *icmph = icmp_hdr(skb); - __be32 saddr = iph->saddr; - __be32 daddr = iph->daddr; /* We assume the packet has already been checked by icmp_rcv */ @@ -711,8 +948,7 @@ void ping_rcv(struct sk_buff *skb) /* Push ICMP header back */ skb_push(skb, skb->data - (u8 *)icmph); - sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id), - skb->dev->ifindex); + sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk != NULL) { pr_debug("rcv on socket %p\n", sk); ping_queue_rcv_skb(sk, skb_get(skb)); @@ -723,6 +959,7 @@ void ping_rcv(struct sk_buff *skb) /* We're called from icmp_rcv(). kfree_skb() is done there. */ } +EXPORT_SYMBOL_GPL(ping_rcv); struct proto ping_prot = { .name = "PING", @@ -733,14 +970,14 @@ struct proto ping_prot = { .disconnect = udp_disconnect, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, - .sendmsg = ping_sendmsg, + .sendmsg = ping_v4_sendmsg, .recvmsg = ping_recvmsg, .bind = ping_bind, .backlog_rcv = ping_queue_rcv_skb, .release_cb = ip4_datagram_release_cb, - .hash = ping_v4_hash, - .unhash = ping_v4_unhash, - .get_port = ping_v4_get_port, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, .obj_size = sizeof(struct inet_sock), }; EXPORT_SYMBOL(ping_prot); @@ -764,7 +1001,8 @@ static struct sock *ping_get_first(struct seq_file *seq, int start) continue; sk_nulls_for_each(sk, node, hslot) { - if (net_eq(sock_net(sk), net)) + if (net_eq(sock_net(sk), net) && + sk->sk_family == state->family) goto found; } } @@ -797,17 +1035,24 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) return pos ? NULL : sk; } -static void *ping_seq_start(struct seq_file *seq, loff_t *pos) +void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) { struct ping_iter_state *state = seq->private; state->bucket = 0; + state->family = family; read_lock_bh(&ping_table.lock); return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } +EXPORT_SYMBOL_GPL(ping_seq_start); + +static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos) +{ + return ping_seq_start(seq, pos, AF_INET); +} -static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) +void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *sk; @@ -819,13 +1064,15 @@ static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; return sk; } +EXPORT_SYMBOL_GPL(ping_seq_next); -static void ping_seq_stop(struct seq_file *seq, void *v) +void ping_seq_stop(struct seq_file *seq, void *v) { read_unlock_bh(&ping_table.lock); } +EXPORT_SYMBOL_GPL(ping_seq_stop); -static void ping_format_sock(struct sock *sp, struct seq_file *f, +static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, int bucket, int *len) { struct inet_sock *inet = inet_sk(sp); @@ -846,7 +1093,7 @@ static void ping_format_sock(struct sock *sp, struct seq_file *f, atomic_read(&sp->sk_drops), len); } -static int ping_seq_show(struct seq_file *seq, void *v) +static int ping_v4_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_printf(seq, "%-127s\n", @@ -857,72 +1104,86 @@ static int ping_seq_show(struct seq_file *seq, void *v) struct ping_iter_state *state = seq->private; int len; - ping_format_sock(v, seq, state->bucket, &len); + ping_v4_format_sock(v, seq, state->bucket, &len); seq_printf(seq, "%*s\n", 127 - len, ""); } return 0; } -static const struct seq_operations ping_seq_ops = { - .show = ping_seq_show, - .start = ping_seq_start, +static const struct seq_operations ping_v4_seq_ops = { + .show = ping_v4_seq_show, + .start = ping_v4_seq_start, .next = ping_seq_next, .stop = ping_seq_stop, }; static int ping_seq_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &ping_seq_ops, + struct ping_seq_afinfo *afinfo = PDE_DATA(inode); + return seq_open_net(inode, file, &afinfo->seq_ops, sizeof(struct ping_iter_state)); } -static const struct file_operations ping_seq_fops = { +const struct file_operations ping_seq_fops = { .open = ping_seq_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_net, }; +EXPORT_SYMBOL_GPL(ping_seq_fops); + +static struct ping_seq_afinfo ping_v4_seq_afinfo = { + .name = "icmp", + .family = AF_INET, + .seq_fops = &ping_seq_fops, + .seq_ops = { + .start = ping_v4_seq_start, + .show = ping_v4_seq_show, + .next = ping_seq_next, + .stop = ping_seq_stop, + }, +}; -static int ping_proc_register(struct net *net) +int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo) { struct proc_dir_entry *p; - int rc = 0; - - p = proc_create("icmp", S_IRUGO, net->proc_net, &ping_seq_fops); + p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, + afinfo->seq_fops, afinfo); if (!p) - rc = -ENOMEM; - return rc; + return -ENOMEM; + return 0; } +EXPORT_SYMBOL_GPL(ping_proc_register); -static void ping_proc_unregister(struct net *net) +void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo) { - remove_proc_entry("icmp", net->proc_net); + remove_proc_entry(afinfo->name, net->proc_net); } +EXPORT_SYMBOL_GPL(ping_proc_unregister); - -static int __net_init ping_proc_init_net(struct net *net) +static int __net_init ping_v4_proc_init_net(struct net *net) { - return ping_proc_register(net); + return ping_proc_register(net, &ping_v4_seq_afinfo); } -static void __net_exit ping_proc_exit_net(struct net *net) +static void __net_exit ping_v4_proc_exit_net(struct net *net) { - ping_proc_unregister(net); + ping_proc_unregister(net, &ping_v4_seq_afinfo); } -static struct pernet_operations ping_net_ops = { - .init = ping_proc_init_net, - .exit = ping_proc_exit_net, +static struct pernet_operations ping_v4_net_ops = { + .init = ping_v4_proc_init_net, + .exit = ping_v4_proc_exit_net, }; int __init ping_proc_init(void) { - return register_pernet_subsys(&ping_net_ops); + return register_pernet_subsys(&ping_v4_net_ops); } void ping_proc_exit(void) { - unregister_pernet_subsys(&ping_net_ops); + unregister_pernet_subsys(&ping_v4_net_ops); } #endif diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 2a5bf86d2415..6577a1149a47 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -273,6 +273,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), + SNMP_MIB_ITEM("LowLatencyRxPackets", LINUX_MIB_LOWLATENCYRXPACKETS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d35bbf0cf404..f3fa42eac461 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -594,11 +594,25 @@ static inline u32 fnhe_hashfun(__be32 daddr) return hval & (FNHE_HASH_SIZE - 1); } +static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) +{ + rt->rt_pmtu = fnhe->fnhe_pmtu; + rt->dst.expires = fnhe->fnhe_expires; + + if (fnhe->fnhe_gw) { + rt->rt_flags |= RTCF_REDIRECTED; + rt->rt_gateway = fnhe->fnhe_gw; + rt->rt_uses_gateway = 1; + } +} + static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, u32 pmtu, unsigned long expires) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; + struct rtable *rt; + unsigned int i; int depth; u32 hval = fnhe_hashfun(daddr); @@ -627,8 +641,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_gw = gw; if (pmtu) { fnhe->fnhe_pmtu = pmtu; - fnhe->fnhe_expires = expires; + fnhe->fnhe_expires = max(1UL, expires); } + /* Update all cached dsts too */ + rt = rcu_dereference(fnhe->fnhe_rth); + if (rt) + fill_route_from_fnhe(rt, fnhe); } else { if (depth > FNHE_RECLAIM_DEPTH) fnhe = fnhe_oldest(hash); @@ -640,10 +658,23 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_next = hash->chain; rcu_assign_pointer(hash->chain, fnhe); } + fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev)); fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; fnhe->fnhe_expires = expires; + + /* Exception created; mark the cached routes for the nexthop + * stale, so anyone caching it rechecks if this exception + * applies to them. + */ + for_each_possible_cpu(i) { + struct rtable __rcu **prt; + prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); + rt = rcu_dereference(*prt); + if (rt) + rt->dst.obsolete = DST_OBSOLETE_KILL; + } } fnhe->fnhe_stamp = jiffies; @@ -922,12 +953,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; - if (!rt->rt_pmtu) { - dst->obsolete = DST_OBSOLETE_KILL; - } else { - rt->rt_pmtu = mtu; - dst->expires = max(1UL, jiffies + ip_rt_mtu_expires); - } + if (rt->rt_pmtu == mtu && + time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) + return; rcu_read_lock(); if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { @@ -1068,11 +1096,11 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) * DST_OBSOLETE_FORCE_CHK which forces validation calls down * into this function always. * - * When a PMTU/redirect information update invalidates a - * route, this is indicated by setting obsolete to - * DST_OBSOLETE_KILL. + * When a PMTU/redirect information update invalidates a route, + * this is indicated by setting obsolete to DST_OBSOLETE_KILL or + * DST_OBSOLETE_DEAD by dst_free(). */ - if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt)) + if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) return NULL; return dst; } @@ -1214,26 +1242,17 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, spin_lock_bh(&fnhe_lock); if (daddr == fnhe->fnhe_daddr) { + int genid = fnhe_genid(dev_net(rt->dst.dev)); struct rtable *orig = rcu_dereference(fnhe->fnhe_rth); - if (orig && rt_is_expired(orig)) { + + if (fnhe->fnhe_genid != genid) { + fnhe->fnhe_genid = genid; fnhe->fnhe_gw = 0; fnhe->fnhe_pmtu = 0; fnhe->fnhe_expires = 0; } - if (fnhe->fnhe_pmtu) { - unsigned long expires = fnhe->fnhe_expires; - unsigned long diff = expires - jiffies; - - if (time_before(jiffies, expires)) { - rt->rt_pmtu = fnhe->fnhe_pmtu; - dst_set_expires(&rt->dst, diff); - } - } - if (fnhe->fnhe_gw) { - rt->rt_flags |= RTCF_REDIRECTED; - rt->rt_gateway = fnhe->fnhe_gw; - rt->rt_uses_gateway = 1; - } else if (!rt->rt_gateway) + fill_route_from_fnhe(rt, fnhe); + if (!rt->rt_gateway) rt->rt_gateway = daddr; rcu_assign_pointer(fnhe->fnhe_rth, rt); @@ -2429,19 +2448,22 @@ static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8; -static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, +static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = (struct net *)__ctl->extra1; + if (write) { - rt_cache_flush((struct net *)__ctl->extra1); + rt_cache_flush(net); + fnhe_genid_bump(net); return 0; } return -EINVAL; } -static ctl_table ipv4_route_table[] = { +static struct ctl_table ipv4_route_table[] = { { .procname = "gc_thresh", .data = &ipv4_dst_ops.gc_thresh, @@ -2609,6 +2631,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { atomic_set(&net->rt_genid, 0); + atomic_set(&net->fnhe_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); return 0; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index fa2f63fc453b..b2c123c44d69 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -49,13 +49,13 @@ static void set_local_port_range(int range[2]) } /* Validate changes from /proc interface. */ -static int ipv4_local_port_range(ctl_table *table, int write, +static int ipv4_local_port_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; int range[2]; - ctl_table tmp = { + struct ctl_table tmp = { .data = &range, .maxlen = sizeof(range), .mode = table->mode, @@ -100,7 +100,7 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig } /* Validate changes from /proc interface. */ -static int ipv4_ping_group_range(ctl_table *table, int write, +static int ipv4_ping_group_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -108,7 +108,7 @@ static int ipv4_ping_group_range(ctl_table *table, int write, int ret; gid_t urange[2]; kgid_t low, high; - ctl_table tmp = { + struct ctl_table tmp = { .data = &urange, .maxlen = sizeof(urange), .mode = table->mode, @@ -135,11 +135,11 @@ static int ipv4_ping_group_range(ctl_table *table, int write, return ret; } -static int proc_tcp_congestion_control(ctl_table *ctl, int write, +static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char val[TCP_CA_NAME_MAX]; - ctl_table tbl = { + struct ctl_table tbl = { .data = val, .maxlen = TCP_CA_NAME_MAX, }; @@ -153,12 +153,12 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, return ret; } -static int proc_tcp_available_congestion_control(ctl_table *ctl, +static int proc_tcp_available_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, }; + struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, }; int ret; tbl.data = kmalloc(tbl.maxlen, GFP_USER); @@ -170,12 +170,12 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl, return ret; } -static int proc_allowed_congestion_control(ctl_table *ctl, +static int proc_allowed_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; + struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; int ret; tbl.data = kmalloc(tbl.maxlen, GFP_USER); @@ -190,7 +190,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return ret; } -static int ipv4_tcp_mem(ctl_table *ctl, int write, +static int ipv4_tcp_mem(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -201,7 +201,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, struct mem_cgroup *memcg; #endif - ctl_table tmp = { + struct ctl_table tmp = { .data = &vec, .maxlen = sizeof(vec), .mode = ctl->mode, @@ -233,10 +233,11 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, return 0; } -static int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, - size_t *lenp, loff_t *ppos) +static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) { - ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; + struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; struct tcp_fastopen_context *ctxt; int ret; u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ab450c099aa4..46ed9afd1f5e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,6 +279,7 @@ #include <asm/uaccess.h> #include <asm/ioctls.h> +#include <net/ll_poll.h> int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; @@ -436,6 +437,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); + sock_rps_record_flow(sk); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == TCP_LISTEN) return inet_csk_listen_poll(sk); @@ -1551,6 +1554,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct sk_buff *skb; u32 urg_hole = 0; + if (sk_valid_ll(sk) && skb_queue_empty(&sk->sk_receive_queue) + && (sk->sk_state == TCP_ESTABLISHED)) + sk_poll_ll(sk, nonblock); + lock_sock(sk); err = -ENOTCONN; @@ -2875,249 +2882,9 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - struct tcphdr *th; - unsigned int thlen; - unsigned int seq; - __be32 delta; - unsigned int oldlen; - unsigned int mss; - struct sk_buff *gso_skb = skb; - __sum16 newcheck; - bool ooo_okay, copy_destructor; - - if (!pskb_may_pull(skb, sizeof(*th))) - goto out; - - th = tcp_hdr(skb); - thlen = th->doff * 4; - if (thlen < sizeof(*th)) - goto out; - - if (!pskb_may_pull(skb, thlen)) - goto out; - - oldlen = (u16)~skb->len; - __skb_pull(skb, thlen); - - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCPV6 | - SKB_GSO_GRE | - SKB_GSO_UDP_TUNNEL | - 0) || - !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) - goto out; - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - copy_destructor = gso_skb->destructor == tcp_wfree; - ooo_okay = gso_skb->ooo_okay; - /* All segments but the first should have ooo_okay cleared */ - skb->ooo_okay = 0; - - segs = skb_segment(skb, features); - if (IS_ERR(segs)) - goto out; - - /* Only first segment might have ooo_okay set */ - segs->ooo_okay = ooo_okay; - - delta = htonl(oldlen + (thlen + mss)); - - skb = segs; - th = tcp_hdr(skb); - seq = ntohl(th->seq); - - newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); - - do { - th->fin = th->psh = 0; - th->check = newcheck; - - if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = - csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); - - seq += mss; - if (copy_destructor) { - skb->destructor = gso_skb->destructor; - skb->sk = gso_skb->sk; - /* {tcp|sock}_wfree() use exact truesize accounting : - * sum(skb->truesize) MUST be exactly be gso_skb->truesize - * So we account mss bytes of 'true size' for each segment. - * The last segment will contain the remaining. - */ - skb->truesize = mss; - gso_skb->truesize -= mss; - } - skb = skb->next; - th = tcp_hdr(skb); - - th->seq = htonl(seq); - th->cwr = 0; - } while (skb->next); - - /* Following permits TCP Small Queues to work well with GSO : - * The callback to TCP stack will be called at the time last frag - * is freed at TX completion, and not right now when gso_skb - * is freed by GSO engine - */ - if (copy_destructor) { - swap(gso_skb->sk, skb->sk); - swap(gso_skb->destructor, skb->destructor); - swap(gso_skb->truesize, skb->truesize); - } - - delta = htonl(oldlen + (skb->tail - skb->transport_header) + - skb->data_len); - th->check = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); - if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); - -out: - return segs; -} -EXPORT_SYMBOL(tcp_tso_segment); - -struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) -{ - struct sk_buff **pp = NULL; - struct sk_buff *p; - struct tcphdr *th; - struct tcphdr *th2; - unsigned int len; - unsigned int thlen; - __be32 flags; - unsigned int mss = 1; - unsigned int hlen; - unsigned int off; - int flush = 1; - int i; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*th); - th = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - th = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!th)) - goto out; - } - - thlen = th->doff * 4; - if (thlen < sizeof(*th)) - goto out; - - hlen = off + thlen; - if (skb_gro_header_hard(skb, hlen)) { - th = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!th)) - goto out; - } - - skb_gro_pull(skb, thlen); - - len = skb_gro_len(skb); - flags = tcp_flag_word(th); - - for (; (p = *head); head = &p->next) { - if (!NAPI_GRO_CB(p)->same_flow) - continue; - - th2 = tcp_hdr(p); - - if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { - NAPI_GRO_CB(p)->same_flow = 0; - continue; - } - - goto found; - } - - goto out_check_final; - -found: - flush = NAPI_GRO_CB(p)->flush; - flush |= (__force int)(flags & TCP_FLAG_CWR); - flush |= (__force int)((flags ^ tcp_flag_word(th2)) & - ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); - flush |= (__force int)(th->ack_seq ^ th2->ack_seq); - for (i = sizeof(*th); i < thlen; i += 4) - flush |= *(u32 *)((u8 *)th + i) ^ - *(u32 *)((u8 *)th2 + i); - - mss = skb_shinfo(p)->gso_size; - - flush |= (len - 1) >= mss; - flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); - - if (flush || skb_gro_receive(head, skb)) { - mss = 1; - goto out_check_final; - } - - p = *head; - th2 = tcp_hdr(p); - tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); - -out_check_final: - flush = len < mss; - flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | - TCP_FLAG_RST | TCP_FLAG_SYN | - TCP_FLAG_FIN)); - - if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) - pp = head; - -out: - NAPI_GRO_CB(skb)->flush |= flush; - - return pp; -} -EXPORT_SYMBOL(tcp_gro_receive); - -int tcp_gro_complete(struct sk_buff *skb) -{ - struct tcphdr *th = tcp_hdr(skb); - - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - - skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - - if (th->cwr) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - - return 0; -} -EXPORT_SYMBOL(tcp_gro_complete); - #ifdef CONFIG_TCP_MD5SIG -static unsigned long tcp_md5sig_users; -static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool; -static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); +static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; +static DEFINE_MUTEX(tcp_md5sig_mutex); static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) { @@ -3132,30 +2899,14 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) free_percpu(pool); } -void tcp_free_md5sig_pool(void) -{ - struct tcp_md5sig_pool __percpu *pool = NULL; - - spin_lock_bh(&tcp_md5sig_pool_lock); - if (--tcp_md5sig_users == 0) { - pool = tcp_md5sig_pool; - tcp_md5sig_pool = NULL; - } - spin_unlock_bh(&tcp_md5sig_pool_lock); - if (pool) - __tcp_free_md5sig_pool(pool); -} -EXPORT_SYMBOL(tcp_free_md5sig_pool); - -static struct tcp_md5sig_pool __percpu * -__tcp_alloc_md5sig_pool(struct sock *sk) +static void __tcp_alloc_md5sig_pool(void) { int cpu; struct tcp_md5sig_pool __percpu *pool; pool = alloc_percpu(struct tcp_md5sig_pool); if (!pool) - return NULL; + return; for_each_possible_cpu(cpu) { struct crypto_hash *hash; @@ -3166,53 +2917,27 @@ __tcp_alloc_md5sig_pool(struct sock *sk) per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; } - return pool; + /* before setting tcp_md5sig_pool, we must commit all writes + * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool() + */ + smp_wmb(); + tcp_md5sig_pool = pool; + return; out_free: __tcp_free_md5sig_pool(pool); - return NULL; } -struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk) +bool tcp_alloc_md5sig_pool(void) { - struct tcp_md5sig_pool __percpu *pool; - bool alloc = false; - -retry: - spin_lock_bh(&tcp_md5sig_pool_lock); - pool = tcp_md5sig_pool; - if (tcp_md5sig_users++ == 0) { - alloc = true; - spin_unlock_bh(&tcp_md5sig_pool_lock); - } else if (!pool) { - tcp_md5sig_users--; - spin_unlock_bh(&tcp_md5sig_pool_lock); - cpu_relax(); - goto retry; - } else - spin_unlock_bh(&tcp_md5sig_pool_lock); - - if (alloc) { - /* we cannot hold spinlock here because this may sleep. */ - struct tcp_md5sig_pool __percpu *p; - - p = __tcp_alloc_md5sig_pool(sk); - spin_lock_bh(&tcp_md5sig_pool_lock); - if (!p) { - tcp_md5sig_users--; - spin_unlock_bh(&tcp_md5sig_pool_lock); - return NULL; - } - pool = tcp_md5sig_pool; - if (pool) { - /* oops, it has already been assigned. */ - spin_unlock_bh(&tcp_md5sig_pool_lock); - __tcp_free_md5sig_pool(p); - } else { - tcp_md5sig_pool = pool = p; - spin_unlock_bh(&tcp_md5sig_pool_lock); - } + if (unlikely(!tcp_md5sig_pool)) { + mutex_lock(&tcp_md5sig_mutex); + + if (!tcp_md5sig_pool) + __tcp_alloc_md5sig_pool(); + + mutex_unlock(&tcp_md5sig_mutex); } - return pool; + return tcp_md5sig_pool != NULL; } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -3229,28 +2954,15 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) struct tcp_md5sig_pool __percpu *p; local_bh_disable(); - - spin_lock(&tcp_md5sig_pool_lock); - p = tcp_md5sig_pool; + p = ACCESS_ONCE(tcp_md5sig_pool); if (p) - tcp_md5sig_users++; - spin_unlock(&tcp_md5sig_pool_lock); - - if (p) - return this_cpu_ptr(p); + return __this_cpu_ptr(p); local_bh_enable(); return NULL; } EXPORT_SYMBOL(tcp_get_md5sig_pool); -void tcp_put_md5sig_pool(void) -{ - local_bh_enable(); - tcp_free_md5sig_pool(); -} -EXPORT_SYMBOL(tcp_put_md5sig_pool); - int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, const struct tcphdr *th) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9c6225780bd5..46271cdcf088 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -347,24 +347,13 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) } /* 3. Tuning rcvbuf, when connection enters established state. */ - static void tcp_fixup_rcvbuf(struct sock *sk) { u32 mss = tcp_sk(sk)->advmss; - u32 icwnd = TCP_DEFAULT_INIT_RCVWND; int rcvmem; - /* Limit to 10 segments if mss <= 1460, - * or 14600/mss segments, with a minimum of two segments. - */ - if (mss > 1460) - icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); - - rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER); - while (tcp_win_from_space(rcvmem) < mss) - rcvmem += 128; - - rcvmem *= icwnd; + rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * + tcp_default_init_rwnd(mss); if (sk->sk_rcvbuf < rcvmem) sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); @@ -1257,8 +1246,6 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, if (skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = prev; - if (skb == tp->scoreboard_skb_hint) - tp->scoreboard_skb_hint = prev; if (skb == tp->lost_skb_hint) { tp->lost_skb_hint = prev; tp->lost_cnt_hint -= tcp_skb_pcount(prev); @@ -1966,20 +1953,6 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) return true; } -static inline int tcp_skb_timedout(const struct sock *sk, - const struct sk_buff *skb) -{ - return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto; -} - -static inline int tcp_head_timedout(const struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - - return tp->packets_out && - tcp_skb_timedout(sk, tcp_write_queue_head(sk)); -} - /* Linux NewReno/SACK/FACK/ECN state machine. * -------------------------------------- * @@ -2086,12 +2059,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) if (tcp_dupack_heuristics(tp) > tp->reordering) return true; - /* Trick#3 : when we use RFC2988 timer restart, fast - * retransmit can be triggered by timeout of queue head. - */ - if (tcp_is_fack(tp) && tcp_head_timedout(sk)) - return true; - /* Trick#4: It is still not OK... But will it be useful to delay * recovery more? */ @@ -2128,44 +2095,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) return false; } -/* New heuristics: it is possible only after we switched to restart timer - * each time when something is ACKed. Hence, we can detect timed out packets - * during fast retransmit without falling to slow start. - * - * Usefulness of this as is very questionable, since we should know which of - * the segments is the next to timeout which is relatively expensive to find - * in general case unless we add some data structure just for that. The - * current approach certainly won't find the right one too often and when it - * finally does find _something_ it usually marks large part of the window - * right away (because a retransmission with a larger timestamp blocks the - * loop from advancing). -ij - */ -static void tcp_timeout_skbs(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - if (!tcp_is_fack(tp) || !tcp_head_timedout(sk)) - return; - - skb = tp->scoreboard_skb_hint; - if (tp->scoreboard_skb_hint == NULL) - skb = tcp_write_queue_head(sk); - - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (!tcp_skb_timedout(sk, skb)) - break; - - tcp_skb_mark_lost(tp, skb); - } - - tp->scoreboard_skb_hint = skb; - - tcp_verify_left_out(tp); -} - /* Detect loss in event "A" above by marking head of queue up as lost. * For FACK or non-SACK(Reno) senders, the first "packets" number of segments * are considered lost. For RFC3517 SACK, a segment is considered lost if it @@ -2251,8 +2180,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) else if (fast_rexmit) tcp_mark_head_lost(sk, 1, 1); } - - tcp_timeout_skbs(sk); } /* CWND moderation, preventing bursts due to too big ACKs @@ -2307,10 +2234,22 @@ static void DBGUNDO(struct sock *sk, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) +static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) { struct tcp_sock *tp = tcp_sk(sk); + if (unmark_loss) { + struct sk_buff *skb; + + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; + TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; + } + tp->lost_out = 0; + tcp_clear_all_retrans_hints(tp); + } + if (tp->prior_ssthresh) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2319,7 +2258,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) else tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); - if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) { + if (tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; TCP_ECN_withdraw_cwr(tp); } @@ -2327,6 +2266,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); } tp->snd_cwnd_stamp = tcp_time_stamp; + tp->undo_marker = 0; } static inline bool tcp_may_undo(const struct tcp_sock *tp) @@ -2346,14 +2286,13 @@ static bool tcp_try_undo_recovery(struct sock *sk) * or our original transmission succeeded. */ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); - tcp_undo_cwr(sk, true); + tcp_undo_cwnd_reduction(sk, false); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) mib_idx = LINUX_MIB_TCPLOSSUNDO; else mib_idx = LINUX_MIB_TCPFULLUNDO; NET_INC_STATS_BH(sock_net(sk), mib_idx); - tp->undo_marker = 0; } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq @@ -2367,16 +2306,17 @@ static bool tcp_try_undo_recovery(struct sock *sk) } /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ -static void tcp_try_undo_dsack(struct sock *sk) +static bool tcp_try_undo_dsack(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, "D-SACK"); - tcp_undo_cwr(sk, true); - tp->undo_marker = 0; + tcp_undo_cwnd_reduction(sk, false); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); + return true; } + return false; } /* We can clear retrans_stamp when there are no retransmissions in the @@ -2408,60 +2348,20 @@ static bool tcp_any_retrans_done(const struct sock *sk) return false; } -/* Undo during fast recovery after partial ACK. */ - -static int tcp_try_undo_partial(struct sock *sk, int acked) -{ - struct tcp_sock *tp = tcp_sk(sk); - /* Partial ACK arrived. Force Hoe's retransmit. */ - int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering); - - if (tcp_may_undo(tp)) { - /* Plain luck! Hole if filled with delayed - * packet, rather than with a retransmit. - */ - if (!tcp_any_retrans_done(sk)) - tp->retrans_stamp = 0; - - tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); - - DBGUNDO(sk, "Hoe"); - tcp_undo_cwr(sk, false); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); - - /* So... Do not make Hoe's retransmit yet. - * If the first packet was delayed, the rest - * ones are most probably delayed as well. - */ - failed = 0; - } - return failed; -} - /* Undo during loss recovery after partial ACK or using F-RTO. */ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) { struct tcp_sock *tp = tcp_sk(sk); if (frto_undo || tcp_may_undo(tp)) { - struct sk_buff *skb; - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - } - - tcp_clear_all_retrans_hints(tp); + tcp_undo_cwnd_reduction(sk, true); DBGUNDO(sk, "partial loss"); - tp->lost_out = 0; - tcp_undo_cwr(sk, true); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); if (frto_undo) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; - tp->undo_marker = 0; if (frto_undo || tcp_is_sack(tp)) tcp_set_ca_state(sk, TCP_CA_Open); return true; @@ -2494,12 +2394,14 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) TCP_ECN_queue_cwr(tp); } -static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, +static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, int fast_rexmit) { struct tcp_sock *tp = tcp_sk(sk); int sndcnt = 0; int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); + int newly_acked_sacked = prior_unsacked - + (tp->packets_out - tp->sacked_out); tp->prr_delivered += newly_acked_sacked; if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { @@ -2556,7 +2458,7 @@ static void tcp_try_keep_open(struct sock *sk) } } -static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) +static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) { struct tcp_sock *tp = tcp_sk(sk); @@ -2573,7 +2475,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) tcp_moderate_cwnd(tp); } else { - tcp_cwnd_reduction(sk, newly_acked_sacked, 0); + tcp_cwnd_reduction(sk, prior_unsacked, 0); } } @@ -2731,6 +2633,40 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) tcp_xmit_retransmit_queue(sk); } +/* Undo during fast recovery after partial ACK. */ +static bool tcp_try_undo_partial(struct sock *sk, const int acked, + const int prior_unsacked) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (tp->undo_marker && tcp_packet_delayed(tp)) { + /* Plain luck! Hole if filled with delayed + * packet, rather than with a retransmit. + */ + tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); + + /* We are getting evidence that the reordering degree is higher + * than we realized. If there are no retransmits out then we + * can undo. Otherwise we clock out new packets but do not + * mark more packets lost or retransmit more. + */ + if (tp->retrans_out) { + tcp_cwnd_reduction(sk, prior_unsacked, 0); + return true; + } + + if (!tcp_any_retrans_done(sk)) + tp->retrans_stamp = 0; + + DBGUNDO(sk, "partial recovery"); + tcp_undo_cwnd_reduction(sk, true); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); + tcp_try_keep_open(sk); + return true; + } + return false; +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2742,15 +2678,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, - int prior_sacked, int prior_packets, +static void tcp_fastretrans_alert(struct sock *sk, const int acked, + const int prior_unsacked, bool is_dupack, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && + bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); - int newly_acked_sacked = 0; int fast_rexmit = 0; if (WARN_ON(!tp->packets_out && tp->sacked_out)) @@ -2802,10 +2737,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (!(flag & FLAG_SND_UNA_ADVANCED)) { if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); - } else - do_lost = tcp_try_undo_partial(sk, pkts_acked); - newly_acked_sacked = prior_packets - tp->packets_out + - tp->sacked_out - prior_sacked; + } else { + if (tcp_try_undo_partial(sk, acked, prior_unsacked)) + return; + /* Partial ACK arrived. Force fast retransmit. */ + do_lost = tcp_is_reno(tp) || + tcp_fackets_out(tp) > tp->reordering; + } + if (tcp_try_undo_dsack(sk)) { + tcp_try_keep_open(sk); + return; + } break; case TCP_CA_Loss: tcp_process_loss(sk, flag, is_dupack); @@ -2819,14 +2761,12 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (is_dupack) tcp_add_reno_sack(sk); } - newly_acked_sacked = prior_packets - tp->packets_out + - tp->sacked_out - prior_sacked; if (icsk->icsk_ca_state <= TCP_CA_Disorder) tcp_try_undo_dsack(sk); if (!tcp_time_to_recover(sk, flag)) { - tcp_try_to_open(sk, flag, newly_acked_sacked); + tcp_try_to_open(sk, flag, prior_unsacked); return; } @@ -2846,9 +2786,9 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, fast_rexmit = 1; } - if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) + if (do_lost) tcp_update_scoreboard(sk, fast_rexmit); - tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); + tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit); tcp_xmit_retransmit_queue(sk); } @@ -3079,7 +3019,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); - tp->scoreboard_skb_hint = NULL; if (skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = NULL; if (skb == tp->lost_skb_hint) @@ -3333,9 +3272,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 prior_in_flight; u32 prior_fackets; int prior_packets = tp->packets_out; - int prior_sacked = tp->sacked_out; - int pkts_acked = 0; - int previous_packets_out = 0; + const int prior_unsacked = tp->packets_out - tp->sacked_out; + int acked = 0; /* Number of packets newly acked */ /* If the ack is older than previous acks * then we can probably ignore it. @@ -3410,18 +3348,17 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - previous_packets_out = tp->packets_out; + acked = tp->packets_out; flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); - - pkts_acked = previous_packets_out - tp->packets_out; + acked -= tp->packets_out; if (tcp_ack_is_dubious(sk, flag)) { /* Advance CWND, if state allows this. */ if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, - prior_packets, is_dupack, flag); + tcp_fastretrans_alert(sk, acked, prior_unsacked, + is_dupack, flag); } else { if (flag & FLAG_DATA_ACKED) tcp_cong_avoid(sk, ack, prior_in_flight); @@ -3443,8 +3380,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, - prior_packets, is_dupack, flag); + tcp_fastretrans_alert(sk, acked, prior_unsacked, + is_dupack, flag); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3466,8 +3403,8 @@ old_ack: */ if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, - prior_packets, is_dupack, flag); + tcp_fastretrans_alert(sk, acked, prior_unsacked, + is_dupack, flag); } SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); @@ -5601,6 +5538,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock *req; int queued = 0; + bool acceptable; tp->rx_opt.saw_tstamp = 0; @@ -5671,157 +5609,147 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; /* step 5: check the ACK field */ - if (true) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | - FLAG_UPDATE_TS_RECENT) > 0; - - switch (sk->sk_state) { - case TCP_SYN_RECV: - if (acceptable) { - /* Once we leave TCP_SYN_RECV, we no longer - * need req so release it. - */ - if (req) { - tcp_synack_rtt_meas(sk, req); - tp->total_retrans = req->num_retrans; - - reqsk_fastopen_remove(sk, req, false); - } else { - /* Make sure socket is routed, for - * correct metrics. - */ - icsk->icsk_af_ops->rebuild_header(sk); - tcp_init_congestion_control(sk); + acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | + FLAG_UPDATE_TS_RECENT) > 0; - tcp_mtup_init(sk); - tcp_init_buffer_space(sk); - tp->copied_seq = tp->rcv_nxt; - } - smp_mb(); - tcp_set_state(sk, TCP_ESTABLISHED); - sk->sk_state_change(sk); - - /* Note, that this wakeup is only for marginal - * crossed SYN case. Passively open sockets - * are not waked up, because sk->sk_sleep == - * NULL and sk->sk_socket == NULL. - */ - if (sk->sk_socket) - sk_wake_async(sk, - SOCK_WAKE_IO, POLL_OUT); - - tp->snd_una = TCP_SKB_CB(skb)->ack_seq; - tp->snd_wnd = ntohs(th->window) << - tp->rx_opt.snd_wscale; - tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - - if (tp->rx_opt.tstamp_ok) - tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - - if (req) { - /* Re-arm the timer because data may - * have been sent out. This is similar - * to the regular data transmission case - * when new data has just been ack'ed. - * - * (TFO) - we could try to be more - * aggressive and retranmitting any data - * sooner based on when they were sent - * out. - */ - tcp_rearm_rto(sk); - } else - tcp_init_metrics(sk); + switch (sk->sk_state) { + case TCP_SYN_RECV: + if (!acceptable) + return 1; - /* Prevent spurious tcp_cwnd_restart() on - * first data packet. - */ - tp->lsndtime = tcp_time_stamp; + /* Once we leave TCP_SYN_RECV, we no longer need req + * so release it. + */ + if (req) { + tcp_synack_rtt_meas(sk, req); + tp->total_retrans = req->num_retrans; - tcp_initialize_rcv_mss(sk); - tcp_fast_path_on(tp); - } else { - return 1; - } - break; + reqsk_fastopen_remove(sk, req, false); + } else { + /* Make sure socket is routed, for correct metrics. */ + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_congestion_control(sk); + + tcp_mtup_init(sk); + tcp_init_buffer_space(sk); + tp->copied_seq = tp->rcv_nxt; + } + smp_mb(); + tcp_set_state(sk, TCP_ESTABLISHED); + sk->sk_state_change(sk); + + /* Note, that this wakeup is only for marginal crossed SYN case. + * Passively open sockets are not waked up, because + * sk->sk_sleep == NULL and sk->sk_socket == NULL. + */ + if (sk->sk_socket) + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); + + tp->snd_una = TCP_SKB_CB(skb)->ack_seq; + tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - case TCP_FIN_WAIT1: - /* If we enter the TCP_FIN_WAIT1 state and we are a - * Fast Open socket and this is the first acceptable - * ACK we have received, this would have acknowledged - * our SYNACK so stop the SYNACK timer. + if (tp->rx_opt.tstamp_ok) + tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; + + if (req) { + /* Re-arm the timer because data may have been sent out. + * This is similar to the regular data transmission case + * when new data has just been ack'ed. + * + * (TFO) - we could try to be more aggressive and + * retransmitting any data sooner based on when they + * are sent out. */ - if (req != NULL) { - /* Return RST if ack_seq is invalid. - * Note that RFC793 only says to generate a - * DUPACK for it but for TCP Fast Open it seems - * better to treat this case like TCP_SYN_RECV - * above. - */ - if (!acceptable) - return 1; - /* We no longer need the request sock. */ - reqsk_fastopen_remove(sk, req, false); - tcp_rearm_rto(sk); - } - if (tp->snd_una == tp->write_seq) { - struct dst_entry *dst; - - tcp_set_state(sk, TCP_FIN_WAIT2); - sk->sk_shutdown |= SEND_SHUTDOWN; - - dst = __sk_dst_get(sk); - if (dst) - dst_confirm(dst); - - if (!sock_flag(sk, SOCK_DEAD)) - /* Wake up lingering close() */ - sk->sk_state_change(sk); - else { - int tmo; - - if (tp->linger2 < 0 || - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && - after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { - tcp_done(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); - return 1; - } + tcp_rearm_rto(sk); + } else + tcp_init_metrics(sk); - tmo = tcp_fin_time(sk); - if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); - } else if (th->fin || sock_owned_by_user(sk)) { - /* Bad case. We could lose such FIN otherwise. - * It is not a big problem, but it looks confusing - * and not so rare event. We still can lose it now, - * if it spins in bh_lock_sock(), but it is really - * marginal case. - */ - inet_csk_reset_keepalive_timer(sk, tmo); - } else { - tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); - goto discard; - } - } - } - break; + /* Prevent spurious tcp_cwnd_restart() on first data packet */ + tp->lsndtime = tcp_time_stamp; - case TCP_CLOSING: - if (tp->snd_una == tp->write_seq) { - tcp_time_wait(sk, TCP_TIME_WAIT, 0); - goto discard; - } + tcp_initialize_rcv_mss(sk); + tcp_fast_path_on(tp); + break; + + case TCP_FIN_WAIT1: { + struct dst_entry *dst; + int tmo; + + /* If we enter the TCP_FIN_WAIT1 state and we are a + * Fast Open socket and this is the first acceptable + * ACK we have received, this would have acknowledged + * our SYNACK so stop the SYNACK timer. + */ + if (req != NULL) { + /* Return RST if ack_seq is invalid. + * Note that RFC793 only says to generate a + * DUPACK for it but for TCP Fast Open it seems + * better to treat this case like TCP_SYN_RECV + * above. + */ + if (!acceptable) + return 1; + /* We no longer need the request sock. */ + reqsk_fastopen_remove(sk, req, false); + tcp_rearm_rto(sk); + } + if (tp->snd_una != tp->write_seq) break; - case TCP_LAST_ACK: - if (tp->snd_una == tp->write_seq) { - tcp_update_metrics(sk); - tcp_done(sk); - goto discard; - } + tcp_set_state(sk, TCP_FIN_WAIT2); + sk->sk_shutdown |= SEND_SHUTDOWN; + + dst = __sk_dst_get(sk); + if (dst) + dst_confirm(dst); + + if (!sock_flag(sk, SOCK_DEAD)) { + /* Wake up lingering close() */ + sk->sk_state_change(sk); break; } + + if (tp->linger2 < 0 || + (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && + after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { + tcp_done(sk); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + return 1; + } + + tmo = tcp_fin_time(sk); + if (tmo > TCP_TIMEWAIT_LEN) { + inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); + } else if (th->fin || sock_owned_by_user(sk)) { + /* Bad case. We could lose such FIN otherwise. + * It is not a big problem, but it looks confusing + * and not so rare event. We still can lose it now, + * if it spins in bh_lock_sock(), but it is really + * marginal case. + */ + inet_csk_reset_keepalive_timer(sk, tmo); + } else { + tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); + goto discard; + } + break; + } + + case TCP_CLOSING: + if (tp->snd_una == tp->write_seq) { + tcp_time_wait(sk, TCP_TIME_WAIT, 0); + goto discard; + } + break; + + case TCP_LAST_ACK: + if (tp->snd_una == tp->write_seq) { + tcp_update_metrics(sk); + tcp_done(sk); + goto discard; + } + break; } /* step 6: check the URG bit */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 719652305a29..1063bb83e342 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -75,6 +75,7 @@ #include <net/netdma.h> #include <net/secure_seq.h> #include <net/tcp_memcontrol.h> +#include <net/ll_poll.h> #include <linux/inet.h> #include <linux/ipv6.h> @@ -545,8 +546,7 @@ out: sock_put(sk); } -static void __tcp_v4_send_check(struct sk_buff *skb, - __be32 saddr, __be32 daddr) +void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct tcphdr *th = tcp_hdr(skb); @@ -571,23 +571,6 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_v4_send_check); -int tcp_v4_gso_send_check(struct sk_buff *skb) -{ - const struct iphdr *iph; - struct tcphdr *th; - - if (!pskb_may_pull(skb, sizeof(*th))) - return -EINVAL; - - iph = ip_hdr(skb); - th = tcp_hdr(skb); - - th->check = 0; - skb->ip_summed = CHECKSUM_PARTIAL; - __tcp_v4_send_check(skb, iph->saddr, iph->daddr); - return 0; -} - /* * This routine will send an RST to the other tcp. * @@ -1026,7 +1009,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, key = sock_kmalloc(sk, sizeof(*key), gfp); if (!key) return -ENOMEM; - if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) { + if (!tcp_alloc_md5sig_pool()) { sock_kfree_s(sk, key, sizeof(*key)); return -ENOMEM; } @@ -1044,9 +1027,7 @@ EXPORT_SYMBOL(tcp_md5_do_add); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) { - struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; - struct tcp_md5sig_info *md5sig; key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); if (!key) @@ -1054,10 +1035,6 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); - md5sig = rcu_dereference_protected(tp->md5sig_info, - sock_owned_by_user(sk)); - if (hlist_empty(&md5sig->head)) - tcp_free_md5sig_pool(); return 0; } EXPORT_SYMBOL(tcp_md5_do_del); @@ -1071,8 +1048,6 @@ static void tcp_clear_md5_list(struct sock *sk) md5sig = rcu_dereference_protected(tp->md5sig_info, 1); - if (!hlist_empty(&md5sig->head)) - tcp_free_md5sig_pool(); hlist_for_each_entry_safe(key, n, &md5sig->head, node) { hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); @@ -2019,6 +1994,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; + sk_mark_ll(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); @@ -2803,52 +2779,6 @@ void tcp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) -{ - const struct iphdr *iph = skb_gro_network_header(skb); - __wsum wsum; - __sum16 sum; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } -flush: - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - - case CHECKSUM_NONE: - wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), IPPROTO_TCP, 0); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - - return tcp_gro_receive(head, skb); -} - -int tcp4_gro_complete(struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); - - th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), - iph->saddr, iph->daddr, 0); - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - - return tcp_gro_complete(skb); -} - struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0f0178827259..ab1c08658528 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -317,7 +317,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) key = tp->af_specific->md5_lookup(sk, sk); if (key != NULL) { tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); - if (tcptw->tw_md5_key && tcp_alloc_md5sig_pool(sk) == NULL) + if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()) BUG(); } } while (0); @@ -358,10 +358,8 @@ void tcp_twsk_destructor(struct sock *sk) #ifdef CONFIG_TCP_MD5SIG struct tcp_timewait_sock *twsk = tcp_twsk(sk); - if (twsk->tw_md5_key) { - tcp_free_md5sig_pool(); + if (twsk->tw_md5_key) kfree_rcu(twsk->tw_md5_key, rcu); - } #endif } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c new file mode 100644 index 000000000000..3a7525e6c086 --- /dev/null +++ b/net/ipv4/tcp_offload.c @@ -0,0 +1,332 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * TCPv4 GSO/GRO support + */ + +#include <linux/skbuff.h> +#include <net/tcp.h> +#include <net/protocol.h> + +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct tcphdr *th; + unsigned int thlen; + unsigned int seq; + __be32 delta; + unsigned int oldlen; + unsigned int mss; + struct sk_buff *gso_skb = skb; + __sum16 newcheck; + bool ooo_okay, copy_destructor; + + if (!pskb_may_pull(skb, sizeof(*th))) + goto out; + + th = tcp_hdr(skb); + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + if (!pskb_may_pull(skb, thlen)) + goto out; + + oldlen = (u16)~skb->len; + __skb_pull(skb, thlen); + + mss = tcp_skb_mss(skb); + if (unlikely(skb->len <= mss)) + goto out; + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + + if (unlikely(type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + SKB_GSO_GRE | + SKB_GSO_MPLS | + SKB_GSO_UDP_TUNNEL | + 0) || + !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) + goto out; + + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + + segs = NULL; + goto out; + } + + copy_destructor = gso_skb->destructor == tcp_wfree; + ooo_okay = gso_skb->ooo_okay; + /* All segments but the first should have ooo_okay cleared */ + skb->ooo_okay = 0; + + segs = skb_segment(skb, features); + if (IS_ERR(segs)) + goto out; + + /* Only first segment might have ooo_okay set */ + segs->ooo_okay = ooo_okay; + + delta = htonl(oldlen + (thlen + mss)); + + skb = segs; + th = tcp_hdr(skb); + seq = ntohl(th->seq); + + newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + + do { + th->fin = th->psh = 0; + th->check = newcheck; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + th->check = + csum_fold(csum_partial(skb_transport_header(skb), + thlen, skb->csum)); + + seq += mss; + if (copy_destructor) { + skb->destructor = gso_skb->destructor; + skb->sk = gso_skb->sk; + /* {tcp|sock}_wfree() use exact truesize accounting : + * sum(skb->truesize) MUST be exactly be gso_skb->truesize + * So we account mss bytes of 'true size' for each segment. + * The last segment will contain the remaining. + */ + skb->truesize = mss; + gso_skb->truesize -= mss; + } + skb = skb->next; + th = tcp_hdr(skb); + + th->seq = htonl(seq); + th->cwr = 0; + } while (skb->next); + + /* Following permits TCP Small Queues to work well with GSO : + * The callback to TCP stack will be called at the time last frag + * is freed at TX completion, and not right now when gso_skb + * is freed by GSO engine + */ + if (copy_destructor) { + swap(gso_skb->sk, skb->sk); + swap(gso_skb->destructor, skb->destructor); + swap(gso_skb->truesize, skb->truesize); + } + + delta = htonl(oldlen + (skb_tail_pointer(skb) - + skb_transport_header(skb)) + + skb->data_len); + th->check = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + if (skb->ip_summed != CHECKSUM_PARTIAL) + th->check = csum_fold(csum_partial(skb_transport_header(skb), + thlen, skb->csum)); +out: + return segs; +} +EXPORT_SYMBOL(tcp_tso_segment); + +struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct tcphdr *th; + struct tcphdr *th2; + unsigned int len; + unsigned int thlen; + __be32 flags; + unsigned int mss = 1; + unsigned int hlen; + unsigned int off; + int flush = 1; + int i; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*th); + th = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } + + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + hlen = off + thlen; + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } + + skb_gro_pull(skb, thlen); + + len = skb_gro_len(skb); + flags = tcp_flag_word(th); + + for (; (p = *head); head = &p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + th2 = tcp_hdr(p); + + if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + goto found; + } + + goto out_check_final; + +found: + flush = NAPI_GRO_CB(p)->flush; + flush |= (__force int)(flags & TCP_FLAG_CWR); + flush |= (__force int)((flags ^ tcp_flag_word(th2)) & + ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); + flush |= (__force int)(th->ack_seq ^ th2->ack_seq); + for (i = sizeof(*th); i < thlen; i += 4) + flush |= *(u32 *)((u8 *)th + i) ^ + *(u32 *)((u8 *)th2 + i); + + mss = tcp_skb_mss(p); + + flush |= (len - 1) >= mss; + flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); + + if (flush || skb_gro_receive(head, skb)) { + mss = 1; + goto out_check_final; + } + + p = *head; + th2 = tcp_hdr(p); + tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); + +out_check_final: + flush = len < mss; + flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | + TCP_FLAG_RST | TCP_FLAG_SYN | + TCP_FLAG_FIN)); + + if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) + pp = head; + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} +EXPORT_SYMBOL(tcp_gro_receive); + +int tcp_gro_complete(struct sk_buff *skb) +{ + struct tcphdr *th = tcp_hdr(skb); + + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + if (th->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + + return 0; +} +EXPORT_SYMBOL(tcp_gro_complete); + +static int tcp_v4_gso_send_check(struct sk_buff *skb) +{ + const struct iphdr *iph; + struct tcphdr *th; + + if (!pskb_may_pull(skb, sizeof(*th))) + return -EINVAL; + + iph = ip_hdr(skb); + th = tcp_hdr(skb); + + th->check = 0; + skb->ip_summed = CHECKSUM_PARTIAL; + __tcp_v4_send_check(skb, iph->saddr, iph->daddr); + return 0; +} + +static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + __wsum wsum; + __sum16 sum; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, + skb->csum)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + + case CHECKSUM_NONE: + wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb_gro_len(skb), IPPROTO_TCP, 0); + sum = csum_fold(skb_checksum(skb, + skb_gro_offset(skb), + skb_gro_len(skb), + wsum)); + if (sum) + goto flush; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } + + return tcp_gro_receive(head, skb); +} + +static int tcp4_gro_complete(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), + iph->saddr, iph->daddr, 0); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + return tcp_gro_complete(skb); +} + +static const struct net_offload tcpv4_offload = { + .callbacks = { + .gso_send_check = tcp_v4_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, + }, +}; + +int __init tcpv4_offload_init(void) +{ + return inet_add_offload(&tcpv4_offload, IPPROTO_TCP); +} diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ec335fabd5cc..3dd46eab3b05 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -181,6 +181,21 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } + +u32 tcp_default_init_rwnd(u32 mss) +{ + /* Initial receive window should be twice of TCP_INIT_CWND to + * enable proper sending of new unset data during fast recovery + * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a + * limit when mss is larger than 1460. + */ + u32 init_rwnd = TCP_INIT_CWND * 2; + + if (mss > 1460) + init_rwnd = max((1460 * init_rwnd) / mss, 2U); + return init_rwnd; +} + /* Determine a window scaling and initial window to offer. * Based on the assumption that the given amount of space * will be offered. Store the results in the tp structure. @@ -230,22 +245,10 @@ void tcp_select_initial_window(int __space, __u32 mss, } } - /* Set initial window to a value enough for senders starting with - * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place - * a limit on the initial window when mss is larger than 1460. - */ if (mss > (1 << *rcv_wscale)) { - int init_cwnd = TCP_DEFAULT_INIT_RCVWND; - if (mss > 1460) - init_cwnd = - max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); - /* when initializing use the value from init_rcv_wnd - * rather than the default from above - */ - if (init_rcv_wnd) - *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); - else - *rcv_wnd = min(*rcv_wnd, init_cwnd * mss); + if (!init_rcv_wnd) /* Use default unless specified otherwise */ + init_rcv_wnd = tcp_default_init_rwnd(mss); + *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); } /* Set the clamp no higher than max representable value */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0bf5d399a03c..959502afd8d9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -109,6 +109,7 @@ #include <trace/events/udp.h> #include <linux/static_key.h> #include <trace/events/skb.h> +#include <net/ll_poll.h> #include "udp_impl.h" struct udp_table udp_table __read_mostly; @@ -429,7 +430,7 @@ begin: reuseport = sk->sk_reuseport; if (reuseport) { hash = inet_ehashfn(net, daddr, hnum, - saddr, htons(sport)); + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { @@ -510,7 +511,7 @@ begin: reuseport = sk->sk_reuseport; if (reuseport) { hash = inet_ehashfn(net, daddr, hnum, - saddr, htons(sport)); + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { @@ -1709,7 +1710,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk != NULL) { - int ret = udp_queue_rcv_skb(sk, skb); + int ret; + + sk_mark_ll(sk, skb); + ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but @@ -1967,6 +1971,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; + sock_rps_record_flow(sk); + /* Check for false positives due to checksum errors */ if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) @@ -2284,29 +2290,8 @@ void __init udp_init(void) sysctl_udp_wmem_min = SK_MEM_QUANTUM; } -int udp4_ufo_send_check(struct sk_buff *skb) -{ - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - return -EINVAL; - - if (likely(!skb->encapsulation)) { - const struct iphdr *iph; - struct udphdr *uh; - - iph = ip_hdr(skb); - uh = udp_hdr(skb); - - uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - IPPROTO_UDP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - } - return 0; -} - -static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, - netdev_features_t features) +struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); int mac_len = skb->mac_len; @@ -2365,53 +2350,3 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, out: return segs; } - -struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_GRE) || - !(type & (SKB_GSO_UDP)))) - goto out; - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - /* Fragment the skb. IP headers of the fragments are updated in - * inet_gso_segment() - */ - if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) - segs = skb_udp_tunnel_segment(skb, features); - else { - int offset; - __wsum csum; - - /* Do software UFO. Complete and fill in the UDP checksum as - * HW cannot do checksum of UDP packets sent as multiple - * IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - - segs = skb_segment(skb, features); - } -out: - return segs; -} diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c new file mode 100644 index 000000000000..f35eccaa855e --- /dev/null +++ b/net/ipv4/udp_offload.c @@ -0,0 +1,100 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * UDPv4 GSO support + */ + +#include <linux/skbuff.h> +#include <net/udp.h> +#include <net/protocol.h> + +static int udp4_ufo_send_check(struct sk_buff *skb) +{ + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + return -EINVAL; + + if (likely(!skb->encapsulation)) { + const struct iphdr *iph; + struct udphdr *uh; + + iph = ip_hdr(skb); + uh = udp_hdr(skb); + + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + IPPROTO_UDP, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + } + + return 0; +} + +static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int mss; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_UDP_TUNNEL | + SKB_GSO_GRE | SKB_GSO_MPLS) || + !(type & (SKB_GSO_UDP)))) + goto out; + + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + + segs = NULL; + goto out; + } + + /* Fragment the skb. IP headers of the fragments are updated in + * inet_gso_segment() + */ + if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + segs = skb_udp_tunnel_segment(skb, features); + else { + int offset; + __wsum csum; + + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + + segs = skb_segment(skb, features); + } +out: + return segs; +} + +static const struct net_offload udpv4_offload = { + .callbacks = { + .gso_send_check = udp4_ufo_send_check, + .gso_segment = udp4_ufo_fragment, + }, +}; + +int __init udpv4_offload_init(void) +{ + return inet_add_offload(&udpv4_offload, IPPROTO_UDP); +} diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 05a5df2febc9..06347dbd32c1 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -63,7 +63,7 @@ static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = { .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, - .priority = 2, + .priority = 3, }; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 9af088d2cdaa..470a9c008e9b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ - raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ + raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1bbf744c2cc3..80449121afa2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1126,8 +1126,7 @@ retry: ift = !max_addresses || ipv6_count_addresses(idev) < max_addresses ? - ipv6_add_addr(idev, &addr, tmp_plen, - ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, + ipv6_add_addr(idev, &addr, tmp_plen, ipv6_addr_scope(&addr), addr_flags) : NULL; if (IS_ERR_OR_NULL(ift)) { in6_ifa_put(ifp); @@ -2402,6 +2401,7 @@ err_exit: * Manual configuration of address on an interface */ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx, + const struct in6_addr *peer_pfx, unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) { @@ -2457,6 +2457,8 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = jiffies; + if (peer_pfx) + ifp->peer_addr = *peer_pfx; spin_unlock_bh(&ifp->lock); addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, @@ -2526,7 +2528,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, + err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL, ireq.ifr6_prefixlen, IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); @@ -2826,9 +2828,9 @@ static void addrconf_ip6_tnl_config(struct net_device *dev) } static int addrconf_notify(struct notifier_block *this, unsigned long event, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *) data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; int err; @@ -3612,18 +3614,20 @@ restart: rcu_read_unlock_bh(); } -static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) +static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local, + struct in6_addr **peer_pfx) { struct in6_addr *pfx = NULL; + *peer_pfx = NULL; + if (addr) pfx = nla_data(addr); if (local) { if (pfx && nla_memcmp(local, pfx, sizeof(*pfx))) - pfx = NULL; - else - pfx = nla_data(local); + *peer_pfx = pfx; + pfx = nla_data(local); } return pfx; @@ -3641,7 +3645,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *pfx; + struct in6_addr *pfx, *peer_pfx; int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -3649,7 +3653,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) return err; ifm = nlmsg_data(nlh); - pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx); if (pfx == NULL) return -EINVAL; @@ -3707,7 +3711,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *pfx; + struct in6_addr *pfx, *peer_pfx; struct inet6_ifaddr *ifa; struct net_device *dev; u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; @@ -3719,7 +3723,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) return err; ifm = nlmsg_data(nlh); - pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx); if (pfx == NULL) return -EINVAL; @@ -3747,7 +3751,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) * It would be best to check for !NLM_F_CREATE here but * userspace alreay relies on not having to provide this. */ - return inet6_addr_add(net, ifm->ifa_index, pfx, + return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx, ifm->ifa_prefixlen, ifa_flags, preferred_lft, valid_lft); } @@ -3804,6 +3808,7 @@ static inline int rt_scope(int ifa_scope) static inline int inet6_ifaddr_msgsize(void) { return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(16) /* IFA_LOCAL */ + nla_total_size(16) /* IFA_ADDRESS */ + nla_total_size(sizeof(struct ifa_cacheinfo)); } @@ -3842,13 +3847,22 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, valid = INFINITY_LIFE_TIME; } - if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || - put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) { - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; - } + if (!ipv6_addr_any(&ifa->peer_addr)) { + if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 || + nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0) + goto error; + } else + if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0) + goto error; + + if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) + goto error; return nlmsg_end(skb, nlh); + +error: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, @@ -4048,7 +4062,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) struct net *net = sock_net(in_skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *addr = NULL; + struct in6_addr *addr = NULL, *peer; struct net_device *dev = NULL; struct inet6_ifaddr *ifa; struct sk_buff *skb; @@ -4058,7 +4072,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (err < 0) goto errout; - addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); if (addr == NULL) { err = -EINVAL; goto errout; @@ -4566,11 +4580,26 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) ip6_ins_rt(ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); + if (!ipv6_addr_any(&ifp->peer_addr)) + addrconf_prefix_route(&ifp->peer_addr, 128, + ifp->idev->dev, 0, 0); break; case RTM_DELADDR: if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); + if (!ipv6_addr_any(&ifp->peer_addr)) { + struct rt6_info *rt; + struct net_device *dev = ifp->idev->dev; + + rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL, + dev->ifindex, 1); + if (rt) { + dst_hold(&rt->dst); + if (ip6_del_rt(rt)) + dst_free(&rt->dst); + } + } dst_hold(&ifp->rt->dst); if (ip6_del_rt(ifp->rt)) @@ -4591,13 +4620,13 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) #ifdef CONFIG_SYSCTL static -int addrconf_sysctl_forward(ctl_table *ctl, int write, +int addrconf_sysctl_forward(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; loff_t pos = *ppos; - ctl_table lctl; + struct ctl_table lctl; int ret; /* @@ -4618,13 +4647,16 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, static void dev_disable_change(struct inet6_dev *idev) { + struct netdev_notifier_info info; + if (!idev || !idev->dev) return; + netdev_notifier_info_init(&info, idev->dev); if (idev->cnf.disable_ipv6) - addrconf_notify(NULL, NETDEV_DOWN, idev->dev); + addrconf_notify(NULL, NETDEV_DOWN, &info); else - addrconf_notify(NULL, NETDEV_UP, idev->dev); + addrconf_notify(NULL, NETDEV_UP, &info); } static void addrconf_disable_change(struct net *net, __s32 newf) @@ -4673,13 +4705,13 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) } static -int addrconf_sysctl_disable(ctl_table *ctl, int write, +int addrconf_sysctl_disable(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; loff_t pos = *ppos; - ctl_table lctl; + struct ctl_table lctl; int ret; /* @@ -4701,7 +4733,7 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write, static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table addrconf_vars[DEVCONF_MAX+1]; + struct ctl_table addrconf_vars[DEVCONF_MAX+1]; } addrconf_sysctl __read_mostly = { .sysctl_header = NULL, .addrconf_vars = { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ab5c7ad482cd..a5ac969aeefe 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -49,6 +49,7 @@ #include <net/udp.h> #include <net/udplite.h> #include <net/tcp.h> +#include <net/ping.h> #include <net/protocol.h> #include <net/inet_common.h> #include <net/route.h> @@ -840,6 +841,9 @@ static int __init inet6_init(void) if (err) goto out_unregister_udplite_proto; + err = proto_register(&pingv6_prot, 1); + if (err) + goto out_unregister_ping_proto; /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. @@ -930,6 +934,10 @@ static int __init inet6_init(void) if (err) goto ipv6_packet_fail; + err = pingv6_init(); + if (err) + goto pingv6_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -942,6 +950,8 @@ out: sysctl_fail: ipv6_packet_cleanup(); #endif +pingv6_fail: + pingv6_exit(); ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: @@ -985,6 +995,8 @@ register_pernet_fail: rtnl_unregister_all(PF_INET6); out_sock_register_fail: rawv6_exit(); +out_unregister_ping_proto: + proto_unregister(&pingv6_prot); out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 4b56cbbc7890..197e6f4a2b74 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -879,3 +879,30 @@ exit_f: return err; } EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl); + +void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, + __u16 srcp, __u16 destp, int bucket) +{ + struct ipv6_pinfo *np = inet6_sk(sp); + const struct in6_addr *dest, *src; + + dest = &np->daddr; + src = &np->rcv_saddr; + seq_printf(seq, + "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", + bucket, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp, + sp->sk_state, + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, + sock_i_ino(sp), + atomic_read(&sp->sk_refcnt), sp, + atomic_read(&sp->sk_drops)); +} diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index c5e83fae4df4..140748debc4a 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -115,7 +115,7 @@ EXPORT_SYMBOL(ipv6_skip_exthdr); int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) { const unsigned char *nh = skb_network_header(skb); - int packet_len = skb->tail - skb->network_header; + int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); struct ipv6_opt_hdr *hdr; int len; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b4ff0a42b8c7..7cfc8d284870 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -57,6 +57,7 @@ #include <net/ipv6.h> #include <net/ip6_checksum.h> +#include <net/ping.h> #include <net/protocol.h> #include <net/raw.h> #include <net/rawv6.h> @@ -84,12 +85,18 @@ static inline struct sock *icmpv6_sk(struct net *net) static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { + /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ + struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) ip6_update_pmtu(skb, net, info, 0, 0); else if (type == NDISC_REDIRECT) ip6_redirect(skb, net, 0, 0); + + if (!(type & ICMPV6_INFOMSG_MASK)) + if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) + ping_err(skb, offset, info); } static int icmpv6_rcv(struct sk_buff *skb); @@ -224,7 +231,8 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset) return (*op & 0xC0) == 0x80; } -static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len) +int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; @@ -307,8 +315,8 @@ static void mip6_addr_swap(struct sk_buff *skb) static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif -static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, - struct sock *sk, struct flowi6 *fl6) +struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, + struct sock *sk, struct flowi6 *fl6) { struct dst_entry *dst, *dst2; struct flowi6 fl2; @@ -391,7 +399,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) int err = 0; if ((u8 *)hdr < skb->head || - (skb->network_header + sizeof(*hdr)) > skb->tail) + (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) return; /* @@ -697,7 +705,8 @@ static int icmpv6_rcv(struct sk_buff *skb) skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n", + LIMIT_NETDEBUG(KERN_DEBUG + "ICMPv6 checksum failed [%pI6c > %pI6c]\n", saddr, daddr); goto csum_error; } @@ -718,7 +727,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_ECHO_REPLY: - /* we couldn't care less */ + ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: @@ -967,7 +976,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err) EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL -ctl_table ipv6_icmp_table_template[] = { +struct ctl_table ipv6_icmp_table_template[] = { { .procname = "ratelimit", .data = &init_net.ipv6.sysctl.icmpv6_time, diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 71b766ee821d..a263b990ee11 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -98,6 +98,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL | + SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 241fb8ad9fcf..583e8d435f9a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1319,7 +1319,7 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, static int ip6mr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct mr6_table *mrt; struct mif_device *v; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index bfa6cc36ef2a..72c8bfe06bb4 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1409,8 +1409,9 @@ static void mld_sendpack(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); - mldlen = skb->tail - skb->transport_header; + payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) - + sizeof(*pip6); + mldlen = skb_tail_pointer(skb) - skb_transport_header(skb); pip6->payload_len = htons(payload_len); pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 0f9bdc5ee9f3..9ac01dc9402e 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -268,7 +268,8 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; @@ -404,7 +405,8 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ca4ffcc287f1..b3b5730b48c5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -693,7 +693,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; @@ -853,7 +853,7 @@ static void ndisc_recv_na(struct sk_buff *skb) const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; @@ -1069,7 +1069,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) __u8 * opt = (__u8 *)(ra_msg + 1); - optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg); + optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - + sizeof(struct ra_msg); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "RA: source address is not link-local\n"); @@ -1346,7 +1347,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) u8 *hdr; struct ndisc_options ndopts; struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb); - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct rd_msg, opt)); #ifdef CONFIG_IPV6_NDISC_NODETYPE @@ -1568,7 +1569,7 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct inet6_dev *idev; diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 60e9053bab05..47bff6107519 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -71,7 +71,7 @@ static int device_cmp(struct nf_conn *ct, void *ifindex) static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - const struct net_device *dev = ptr; + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_DOWN) @@ -89,8 +89,10 @@ static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = ptr; + struct netdev_notifier_info info; - return masq_device_event(this, event, ifa->idev->dev); + netdev_notifier_info_init(&info, ifa->idev->dev); + return masq_device_event(this, event, &info); } static struct notifier_block masq_inet_notifier = { diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index c2e73e647e44..ab92a3673fbb 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -40,7 +40,8 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) u16 offset = sizeof(struct ipv6hdr); struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c new file mode 100644 index 000000000000..2b52046e1263 --- /dev/null +++ b/net/ipv6/ping.c @@ -0,0 +1,272 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * "Ping" sockets + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on ipv4/ping.c code. + * + * Authors: Lorenzo Colitti (IPv6 support) + * Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6), + * Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32) + * + */ + +#include <net/addrconf.h> +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/protocol.h> +#include <net/udp.h> +#include <net/transp_v6.h> +#include <net/ping.h> + +struct proto pingv6_prot = { + .name = "PINGv6", + .owner = THIS_MODULE, + .init = ping_init_sock, + .close = ping_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .setsockopt = ipv6_setsockopt, + .getsockopt = ipv6_getsockopt, + .sendmsg = ping_v6_sendmsg, + .recvmsg = ping_recvmsg, + .bind = ping_bind, + .backlog_rcv = ping_queue_rcv_skb, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, + .obj_size = sizeof(struct raw6_sock), +}; +EXPORT_SYMBOL_GPL(pingv6_prot); + +static struct inet_protosw pingv6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + .prot = &pingv6_prot, + .ops = &inet6_dgram_ops, + .no_check = UDP_CSUM_DEFAULT, + .flags = INET_PROTOSW_REUSE, +}; + + +/* Compatibility glue so we can support IPv6 when it's compiled as a module */ +static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +{ + return -EAFNOSUPPORT; +} +static int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + return -EAFNOSUPPORT; +} +static int dummy_icmpv6_err_convert(u8 type, u8 code, int *err) +{ + return -EAFNOSUPPORT; +} +static void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload) {} +static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, int strict) +{ + return 0; +} + +int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct icmp6hdr user_icmph; + int addr_type; + struct in6_addr *daddr; + int iif = 0; + struct flowi6 fl6; + int err; + int hlimit; + struct dst_entry *dst; + struct rt6_info *rt; + struct pingfakehdr pfh; + + pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + + if (msg->msg_name) { + struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name; + if (msg->msg_namelen < sizeof(struct sockaddr_in6) || + u->sin6_family != AF_INET6) { + return -EINVAL; + } + if (sk->sk_bound_dev_if && + sk->sk_bound_dev_if != u->sin6_scope_id) { + return -EINVAL; + } + daddr = &(u->sin6_addr); + iif = u->sin6_scope_id; + } else { + if (sk->sk_state != TCP_ESTABLISHED) + return -EDESTADDRREQ; + daddr = &np->daddr; + } + + if (!iif) + iif = sk->sk_bound_dev_if; + + addr_type = ipv6_addr_type(daddr); + if (__ipv6_addr_needs_scope_id(addr_type) && !iif) + return -EINVAL; + if (addr_type & IPV6_ADDR_MAPPED) + return -EINVAL; + + /* TODO: use ip6_datagram_send_ctl to get options from cmsg */ + + memset(&fl6, 0, sizeof(fl6)); + + fl6.flowi6_proto = IPPROTO_ICMPV6; + fl6.saddr = np->saddr; + fl6.daddr = *daddr; + fl6.fl6_icmp_type = user_icmph.icmp6_type; + fl6.fl6_icmp_code = user_icmph.icmp6_code; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; + + dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1); + if (IS_ERR(dst)) + return PTR_ERR(dst); + rt = (struct rt6_info *) dst; + + np = inet6_sk(sk); + if (!np) + return -EBADF; + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; + + pfh.icmph.type = user_icmph.icmp6_type; + pfh.icmph.code = user_icmph.icmp6_code; + pfh.icmph.checksum = 0; + pfh.icmph.un.echo.id = inet->inet_sport; + pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; + pfh.iov = msg->msg_iov; + pfh.wcheck = 0; + pfh.family = AF_INET6; + + if (ipv6_addr_is_multicast(&fl6.daddr)) + hlimit = np->mcast_hops; + else + hlimit = np->hop_limit; + if (hlimit < 0) + hlimit = ip6_dst_hoplimit(dst); + + err = ip6_append_data(sk, ping_getfrag, &pfh, len, + 0, hlimit, + np->tclass, NULL, &fl6, rt, + MSG_DONTWAIT, np->dontfrag); + + if (err) { + ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev, + ICMP6_MIB_OUTERRORS); + ip6_flush_pending_frames(sk); + } else { + err = icmpv6_push_pending_frames(sk, &fl6, + (struct icmp6hdr *) &pfh.icmph, + len); + } + + return err; +} + +#ifdef CONFIG_PROC_FS +static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos) +{ + return ping_seq_start(seq, pos, AF_INET6); +} + +static int ping_v6_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + int bucket = ((struct ping_iter_state *) seq->private)->bucket; + struct inet_sock *inet = inet_sk(v); + __u16 srcp = ntohs(inet->inet_sport); + __u16 destp = ntohs(inet->inet_dport); + ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); + } + return 0; +} + +static struct ping_seq_afinfo ping_v6_seq_afinfo = { + .name = "icmp6", + .family = AF_INET6, + .seq_fops = &ping_seq_fops, + .seq_ops = { + .start = ping_v6_seq_start, + .show = ping_v6_seq_show, + .next = ping_seq_next, + .stop = ping_seq_stop, + }, +}; + +static int __net_init ping_v6_proc_init_net(struct net *net) +{ + return ping_proc_register(net, &ping_v6_seq_afinfo); +} + +static void __net_init ping_v6_proc_exit_net(struct net *net) +{ + return ping_proc_unregister(net, &ping_v6_seq_afinfo); +} + +static struct pernet_operations ping_v6_net_ops = { + .init = ping_v6_proc_init_net, + .exit = ping_v6_proc_exit_net, +}; +#endif + +int __init pingv6_init(void) +{ +#ifdef CONFIG_PROC_FS + int ret = register_pernet_subsys(&ping_v6_net_ops); + if (ret) + return ret; +#endif + pingv6_ops.ipv6_recv_error = ipv6_recv_error; + pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; + return inet6_register_protosw(&pingv6_protosw); +} + +/* This never gets called because it's not possible to unload the ipv6 module, + * but just in case. + */ +void pingv6_exit(void) +{ + pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; + pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; +#ifdef CONFIG_PROC_FS + unregister_pernet_subsys(&ping_v6_net_ops); +#endif + inet6_unregister_protosw(&pingv6_protosw); +} diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index eedff8ccded5..c45f7a5c36e9 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1132,7 +1132,8 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) - amount = skb->tail - skb->transport_header; + amount = skb_tail_pointer(skb) - + skb_transport_header(skb); spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); } @@ -1226,45 +1227,16 @@ struct proto rawv6_prot = { }; #ifdef CONFIG_PROC_FS -static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) -{ - struct ipv6_pinfo *np = inet6_sk(sp); - const struct in6_addr *dest, *src; - __u16 destp, srcp; - - dest = &np->daddr; - src = &np->rcv_saddr; - destp = 0; - srcp = inet_sk(sp)->inet_num; - seq_printf(seq, - "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", - i, - src->s6_addr32[0], src->s6_addr32[1], - src->s6_addr32[2], src->s6_addr32[3], srcp, - dest->s6_addr32[0], dest->s6_addr32[1], - dest->s6_addr32[2], dest->s6_addr32[3], destp, - sp->sk_state, - sk_wmem_alloc_get(sp), - sk_rmem_alloc_get(sp), - 0, 0L, 0, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), - 0, - sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); -} - static int raw6_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) - seq_printf(seq, - " sl " - "local_address " - "remote_address " - "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode ref pointer drops\n"); - else - raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + struct sock *sp = v; + __u16 srcp = inet_sk(sp)->inet_num; + ip6_dgram_sock_seq_show(seq, v, srcp, 0, + raw_seq_private(seq)->bucket); + } return 0; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad0aa6b0b86a..7ca87b37c0ef 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1649,7 +1649,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu int optlen, on_link; u8 *lladdr; - optlen = skb->tail - skb->transport_header; + optlen = skb_tail_pointer(skb) - skb_transport_header(skb); optlen -= sizeof(*msg); if (optlen < 0) { @@ -2681,9 +2681,9 @@ errout: } static int ip6_route_dev_notify(struct notifier_block *this, - unsigned long event, void *data) + unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { @@ -2790,7 +2790,7 @@ static const struct file_operations rt6_stats_seq_fops = { #ifdef CONFIG_SYSCTL static -int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, +int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net; @@ -2805,7 +2805,7 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, return 0; } -ctl_table ipv6_route_table_template[] = { +struct ctl_table ipv6_route_table_template[] = { { .procname = "flush", .data = &init_net.ipv6.sysctl.flush_delay, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 335363478bbf..6b9c1f128eaf 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -577,6 +577,10 @@ static int ipip6_rcv(struct sk_buff *skb) if (tunnel != NULL) { struct pcpu_tstats *tstats; + if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && + tunnel->parms.iph.protocol != 0) + goto out; + secpath_reset(skb); skb->mac_header = skb->network_header; skb_reset_network_header(skb); @@ -629,6 +633,35 @@ out: return 0; } +static const struct tnl_ptk_info tpi = { + /* no tunnel info required for ipip. */ + .proto = htons(ETH_P_IP), +}; + +static int ipip_rcv(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + struct ip_tunnel *tunnel; + + tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, + iph->saddr, iph->daddr); + if (tunnel != NULL) { + if (tunnel->parms.iph.protocol != IPPROTO_IPIP && + tunnel->parms.iph.protocol != 0) + goto drop; + + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); + } + + return 1; + +drop: + kfree_skb(skb); + return 0; +} + /* * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function * stores the embedded IPv4 address in v4dst and returns true. @@ -877,6 +910,43 @@ tx_error: return NETDEV_TX_OK; } +static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *tiph = &tunnel->parms.iph; + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); + return NETDEV_TX_OK; +} + +static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + ipip_tunnel_xmit(skb, dev); + break; + case htons(ETH_P_IPV6): + ipip6_tunnel_xmit(skb, dev); + break; + default: + goto tx_err; + } + + return NETDEV_TX_OK; + +tx_err: + dev->stats.tx_errors++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; + +} + static void ipip6_tunnel_bind_dev(struct net_device *dev) { struct net_device *tdev = NULL; @@ -1027,7 +1097,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; err = -EINVAL; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || + if (p.iph.protocol != IPPROTO_IPV6 && + p.iph.protocol != IPPROTO_IPIP && + p.iph.protocol != 0) + goto done; + if (p.iph.version != 4 || p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) goto done; if (p.iph.ttl) @@ -1164,7 +1238,7 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops ipip6_netdev_ops = { .ndo_uninit = ipip6_tunnel_uninit, - .ndo_start_xmit = ipip6_tunnel_xmit, + .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_change_mtu = ipip6_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, @@ -1232,6 +1306,22 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) return 0; } +static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + u8 proto; + + if (!data) + return 0; + + proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); + if (proto != IPPROTO_IPV6 && + proto != IPPROTO_IPIP && + proto != 0) + return -EINVAL; + + return 0; +} + static void ipip6_netlink_parms(struct nlattr *data[], struct ip_tunnel_parm *parms) { @@ -1268,6 +1358,10 @@ static void ipip6_netlink_parms(struct nlattr *data[], if (data[IFLA_IPTUN_FLAGS]) parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); + + if (data[IFLA_IPTUN_PROTO]) + parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); + } #ifdef CONFIG_IPV6_SIT_6RD @@ -1391,6 +1485,8 @@ static size_t ipip6_get_size(const struct net_device *dev) nla_total_size(1) + /* IFLA_IPTUN_FLAGS */ nla_total_size(2) + + /* IFLA_IPTUN_PROTO */ + nla_total_size(1) + #ifdef CONFIG_IPV6_SIT_6RD /* IFLA_IPTUN_6RD_PREFIX */ nla_total_size(sizeof(struct in6_addr)) + @@ -1416,6 +1512,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, !!(parm->iph.frag_off & htons(IP_DF))) || + nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) || nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) goto nla_put_failure; @@ -1445,6 +1542,7 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_TOS] = { .type = NLA_U8 }, [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 }, + [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, #ifdef CONFIG_IPV6_SIT_6RD [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) }, [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 }, @@ -1459,6 +1557,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = { .policy = ipip6_policy, .priv_size = sizeof(struct ip_tunnel), .setup = ipip6_tunnel_setup, + .validate = ipip6_validate, .newlink = ipip6_newlink, .changelink = ipip6_changelink, .get_size = ipip6_get_size, @@ -1471,6 +1570,12 @@ static struct xfrm_tunnel sit_handler __read_mostly = { .priority = 1, }; +static struct xfrm_tunnel ipip_handler __read_mostly = { + .handler = ipip_rcv, + .err_handler = ipip6_err, + .priority = 2, +}; + static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) { int prio; @@ -1553,6 +1658,7 @@ static void __exit sit_cleanup(void) { rtnl_link_unregister(&sit_link_ops); xfrm4_tunnel_deregister(&sit_handler, AF_INET6); + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); unregister_pernet_device(&sit_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ @@ -1569,9 +1675,14 @@ static int __init sit_init(void) return err; err = xfrm4_tunnel_register(&sit_handler, AF_INET6); if (err < 0) { - pr_info("%s: can't add protocol\n", __func__); + pr_info("%s: can't register ip6ip4\n", __func__); goto xfrm_tunnel_failed; } + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + pr_info("%s: can't register ip4ip4\n", __func__); + goto xfrm_tunnel4_failed; + } err = rtnl_link_register(&sit_link_ops); if (err < 0) goto rtnl_link_failed; @@ -1580,6 +1691,8 @@ out: return err; rtnl_link_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel4_failed: xfrm4_tunnel_deregister(&sit_handler, AF_INET6); xfrm_tunnel_failed: unregister_pernet_device(&sit_net_ops); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index e85c48bd404f..107b2f1d90ae 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -16,7 +16,7 @@ #include <net/addrconf.h> #include <net/inet_frag.h> -static ctl_table ipv6_table_template[] = { +static struct ctl_table ipv6_table_template[] = { { .procname = "bindv6only", .data = &init_net.ipv6.sysctl.bindv6only, @@ -27,7 +27,7 @@ static ctl_table ipv6_table_template[] = { { } }; -static ctl_table ipv6_rotable[] = { +static struct ctl_table ipv6_rotable[] = { { .procname = "mld_max_msf", .data = &sysctl_mld_max_msf, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0a17ed9eaf39..5cffa5c3e6b8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -63,6 +63,7 @@ #include <net/inet_common.h> #include <net/secure_seq.h> #include <net/tcp_memcontrol.h> +#include <net/ll_poll.h> #include <asm/uaccess.h> @@ -1498,6 +1499,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; + sk_mark_ll(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 42923b14dfa6..f77e34c5a0e2 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -46,6 +46,7 @@ #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/inet6_hashtables.h> +#include <net/ll_poll.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -841,7 +842,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk != NULL) { - int ret = udpv6_queue_rcv_skb(sk, skb); + int ret; + + sk_mark_ll(sk, skb); + ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but @@ -1359,48 +1363,17 @@ static const struct inet6_protocol udpv6_protocol = { /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS - -static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket) -{ - struct inet_sock *inet = inet_sk(sp); - struct ipv6_pinfo *np = inet6_sk(sp); - const struct in6_addr *dest, *src; - __u16 destp, srcp; - - dest = &np->daddr; - src = &np->rcv_saddr; - destp = ntohs(inet->inet_dport); - srcp = ntohs(inet->inet_sport); - seq_printf(seq, - "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", - bucket, - src->s6_addr32[0], src->s6_addr32[1], - src->s6_addr32[2], src->s6_addr32[3], srcp, - dest->s6_addr32[0], dest->s6_addr32[1], - dest->s6_addr32[2], dest->s6_addr32[3], destp, - sp->sk_state, - sk_wmem_alloc_get(sp), - sk_rmem_alloc_get(sp), - 0, 0L, 0, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), - 0, - sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, - atomic_read(&sp->sk_drops)); -} - int udp6_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) - seq_printf(seq, - " sl " - "local_address " - "remote_address " - "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode ref pointer drops\n"); - else - udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket); + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + int bucket = ((struct udp_iter_state *)seq->private)->bucket; + struct inet_sock *inet = inet_sk(v); + __u16 srcp = ntohs(inet->inet_sport); + __u16 destp = ntohs(inet->inet_dport); + ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); + } return 0; } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index d3cfaf9c7a08..5d1b8d7ac993 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -64,7 +64,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | - SKB_GSO_GRE) || + SKB_GSO_GRE | + SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index f547a47d381c..7a1e0fc1bd4d 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -330,7 +330,7 @@ static __inline__ void __ipxitf_put(struct ipx_interface *intrfc) static int ipxitf_device_event(struct notifier_block *notifier, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ipx_interface *i, *tmp; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index de73f6496db5..d6a59651767a 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -73,7 +73,7 @@ static int min_lap_keepalive_time = 100; /* 100us */ /* For other sysctl, I've no idea of the range. Maybe Dag could help * us on that - Jean II */ -static int do_devname(ctl_table *table, int write, +static int do_devname(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -90,7 +90,7 @@ static int do_devname(ctl_table *table, int write, } -static int do_discovery(ctl_table *table, int write, +static int do_discovery(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -111,7 +111,7 @@ static int do_discovery(ctl_table *table, int write, } /* One file */ -static ctl_table irda_table[] = { +static struct ctl_table irda_table[] = { { .procname = "discovery", .data = &sysctl_discovery, diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index ae691651b721..168aff5e60de 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2293,7 +2293,7 @@ out_unlock: static int afiucv_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *event_dev = (struct net_device *)ptr; + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); struct sock *sk; struct iucv_sock *iucv; diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 0785e95c9924..be7614b9ed27 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -85,7 +85,7 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, *cpos++ = *pos++ ^ e[i]; } - for (i = 0; i < CCMP_MIC_LEN; i++) + for (i = 0; i < IEEE80211_CCMP_MIC_LEN; i++) mic[i] = b[i] ^ s_0[i]; } @@ -123,7 +123,7 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, crypto_cipher_encrypt_one(tfm, a, a); } - for (i = 0; i < CCMP_MIC_LEN; i++) { + for (i = 0; i < IEEE80211_CCMP_MIC_LEN; i++) { if ((mic[i] ^ s_0[i]) != a[i]) return -1; } @@ -138,7 +138,7 @@ struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[]) tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); if (!IS_ERR(tfm)) - crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN); + crypto_cipher_setkey(tfm, key, WLAN_KEY_LEN_CCMP); return tfm; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4fdb306e42e0..a1c6e1ceede8 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -73,16 +73,19 @@ static int ieee80211_change_iface(struct wiphy *wiphy, struct ieee80211_local *local = sdata->local; if (ieee80211_sdata_running(sdata)) { + u32 mask = MONITOR_FLAG_COOK_FRAMES | + MONITOR_FLAG_ACTIVE; + /* - * Prohibit MONITOR_FLAG_COOK_FRAMES to be - * changed while the interface is up. + * Prohibit MONITOR_FLAG_COOK_FRAMES and + * MONITOR_FLAG_ACTIVE to be changed while the + * interface is up. * Else we would need to add a lot of cruft * to update everything: * cooked_mntrs, monitor and all fif_* counters * reconfigure hardware */ - if ((*flags & MONITOR_FLAG_COOK_FRAMES) != - (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES)) + if ((*flags & mask) != (sdata->u.mntr_flags & mask)) return -EBUSY; ieee80211_adjust_monitor_flags(sdata, -1); @@ -444,7 +447,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) struct ieee80211_local *local = sdata->local; struct timespec uptime; u64 packets = 0; - int ac; + int i, ac; sinfo->generation = sdata->local->sta_generation; @@ -488,6 +491,17 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->signal = (s8)sta->last_signal; sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); } + if (sta->chains) { + sinfo->filled |= STATION_INFO_CHAIN_SIGNAL | + STATION_INFO_CHAIN_SIGNAL_AVG; + + sinfo->chains = sta->chains; + for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { + sinfo->chain_signal[i] = sta->chain_signal_last[i]; + sinfo->chain_signal_avg[i] = + (s8) -ewma_read(&sta->chain_signal_avg[i]); + } + } sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate); sta_set_rate_info_rx(sta, &sinfo->rxrate); @@ -728,7 +742,7 @@ static void ieee80211_get_et_strings(struct wiphy *wiphy, if (sset == ETH_SS_STATS) { sz_sta_stats = sizeof(ieee80211_gstrings_sta_stats); - memcpy(data, *ieee80211_gstrings_sta_stats, sz_sta_stats); + memcpy(data, ieee80211_gstrings_sta_stats, sz_sta_stats); } drv_get_et_strings(sdata, sset, &(data[sz_sta_stats])); } @@ -1741,6 +1755,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, ifmsh->mesh_pp_id = setup->path_sel_proto; ifmsh->mesh_pm_id = setup->path_metric; ifmsh->user_mpm = setup->user_mpm; + ifmsh->mesh_auth_id = setup->auth_id; ifmsh->security = IEEE80211_MESH_SEC_NONE; if (setup->is_authenticated) ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; @@ -2312,7 +2327,7 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode old_req; int err; - lockdep_assert_held(&sdata->u.mgd.mtx); + lockdep_assert_held(&sdata->wdev.mtx); old_req = sdata->u.mgd.req_smps; sdata->u.mgd.req_smps = smps_mode; @@ -2369,9 +2384,9 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, local->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); __ieee80211_request_smps(sdata, sdata->u.mgd.req_smps); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 14abcf44f974..cafe614ef93d 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -228,9 +228,9 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); err = __ieee80211_request_smps(sdata, smps_mode); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); return err; } @@ -313,16 +313,16 @@ static ssize_t ieee80211_if_parse_tkip_mic_test( case NL80211_IFTYPE_STATION: fc |= cpu_to_le16(IEEE80211_FCTL_TODS); /* BSSID SA DA */ - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); if (!sdata->u.mgd.associated) { - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); dev_kfree_skb(skb); return -ENOTCONN; } memcpy(hdr->addr1, sdata->u.mgd.associated->bssid, ETH_ALEN); memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); memcpy(hdr->addr3, addr, ETH_ALEN); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); break; default: dev_kfree_skb(skb); @@ -471,6 +471,8 @@ __IEEE80211_IF_FILE_W(tsf); IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); #ifdef CONFIG_MAC80211_MESH +IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC); + /* Mesh stats attributes */ IEEE80211_IF_FILE(fwded_mcast, u.mesh.mshstats.fwded_mcast, DEC); IEEE80211_IF_FILE(fwded_unicast, u.mesh.mshstats.fwded_unicast, DEC); @@ -480,7 +482,6 @@ IEEE80211_IF_FILE(dropped_frames_congestion, u.mesh.mshstats.dropped_frames_congestion, DEC); IEEE80211_IF_FILE(dropped_frames_no_route, u.mesh.mshstats.dropped_frames_no_route, DEC); -IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC); /* Mesh parameters */ IEEE80211_IF_FILE(dot11MeshMaxRetries, @@ -583,6 +584,7 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata) static void add_mesh_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD_MODE(tsf, 0600); + DEBUGFS_ADD_MODE(estab_plinks, 0400); } static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) @@ -598,7 +600,6 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) MESHSTATS_ADD(dropped_frames_ttl); MESHSTATS_ADD(dropped_frames_no_route); MESHSTATS_ADD(dropped_frames_congestion); - MESHSTATS_ADD(estab_plinks); #undef MESHSTATS_ADD } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 169664c122e2..b931c96a596f 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -146,7 +146,8 @@ static inline int drv_add_interface(struct ieee80211_local *local, if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && - !(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)))) + !(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF) && + !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)))) return -EINVAL; trace_drv_add_interface(local, sdata); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index af8cee06e4f3..75dff338f581 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -429,9 +429,9 @@ void ieee80211_request_smps_work(struct work_struct *work) container_of(work, struct ieee80211_sub_if_data, u.mgd.request_smps_work); - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); __ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); } void ieee80211_request_smps(struct ieee80211_vif *vif, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 170f9a7fa319..caa4b4f7f6e4 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -54,7 +54,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; int frame_len; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); /* Reset own TSF to allow time synchronization work. */ drv_reset_tsf(local, sdata); @@ -74,7 +74,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, } presp = rcu_dereference_protected(ifibss->presp, - lockdep_is_held(&ifibss->mtx)); + lockdep_is_held(&sdata->wdev.mtx)); rcu_assign_pointer(ifibss->presp, NULL); if (presp) kfree_rcu(presp, rcu_head); @@ -263,7 +263,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, const struct cfg80211_bss_ies *ies; u64 tsf; - lockdep_assert_held(&sdata->u.ibss.mtx); + sdata_assert_lock(sdata); if (beacon_int < 10) beacon_int = 10; @@ -341,6 +341,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_supported_band *sband; int band; /* @@ -380,8 +381,9 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, sta->last_rx = jiffies; /* make sure mandatory rates are always added */ + sband = local->hw.wiphy->bands[band]; sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(local, band); + ieee80211_mandatory_rates(sband); return ieee80211_ibss_finish_sta(sta, auth); } @@ -408,7 +410,7 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; u8 deauth_frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - lockdep_assert_held(&sdata->u.ibss.mtx); + sdata_assert_lock(sdata); if (len < 24 + 6) return; @@ -492,7 +494,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, prev_rates = sta->sta.supp_rates[band]; /* make sure mandatory rates are always added */ sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(local, band); + ieee80211_mandatory_rates(sband); if (sta->sta.supp_rates[band] != prev_rates) { ibss_dbg(sdata, @@ -624,6 +626,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_supported_band *sband; int band; /* @@ -658,8 +661,9 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, sta->last_rx = jiffies; /* make sure mandatory rates are always added */ + sband = local->hw.wiphy->bands[band]; sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(local, band); + ieee80211_mandatory_rates(sband); spin_lock(&ifibss->incomplete_lock); list_add(&sta->list, &ifibss->incomplete_stations); @@ -673,7 +677,7 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) int active = 0; struct sta_info *sta; - lockdep_assert_held(&sdata->u.ibss.mtx); + sdata_assert_lock(sdata); rcu_read_lock(); @@ -699,7 +703,7 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); mod_timer(&ifibss->timer, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); @@ -730,7 +734,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) u16 capability; int i; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); if (ifibss->fixed_bssid) { memcpy(bssid, ifibss->bssid, ETH_ALEN); @@ -773,7 +777,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) int active_ibss; u16 capability; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); active_ibss = ieee80211_sta_active_ibss(sdata); ibss_dbg(sdata, "sta_find_ibss (active_ibss=%d)\n", active_ibss); @@ -843,10 +847,10 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; u8 *pos, *end; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); presp = rcu_dereference_protected(ifibss->presp, - lockdep_is_held(&ifibss->mtx)); + lockdep_is_held(&sdata->wdev.mtx)); if (ifibss->state != IEEE80211_IBSS_MLME_JOINED || len < 24 + 2 || !presp) @@ -930,7 +934,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); - mutex_lock(&sdata->u.ibss.mtx); + sdata_lock(sdata); if (!sdata->u.ibss.ssid_len) goto mgmt_out; /* not ready to merge yet */ @@ -953,7 +957,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, } mgmt_out: - mutex_unlock(&sdata->u.ibss.mtx); + sdata_unlock(sdata); } void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) @@ -961,7 +965,7 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct sta_info *sta; - mutex_lock(&ifibss->mtx); + sdata_lock(sdata); /* * Work could be scheduled after scan or similar @@ -997,7 +1001,7 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) } out: - mutex_unlock(&ifibss->mtx); + sdata_unlock(sdata); } static void ieee80211_ibss_timer(unsigned long data) @@ -1014,7 +1018,6 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) setup_timer(&ifibss->timer, ieee80211_ibss_timer, (unsigned long) sdata); - mutex_init(&ifibss->mtx); INIT_LIST_HEAD(&ifibss->incomplete_stations); spin_lock_init(&ifibss->incomplete_lock); } @@ -1041,8 +1044,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, { u32 changed = 0; - mutex_lock(&sdata->u.ibss.mtx); - if (params->bssid) { memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN); sdata->u.ibss.fixed_bssid = true; @@ -1075,8 +1076,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, memcpy(sdata->u.ibss.ssid, params->ssid, params->ssid_len); sdata->u.ibss.ssid_len = params->ssid_len; - mutex_unlock(&sdata->u.ibss.mtx); - /* * 802.11n-2009 9.13.3.1: In an IBSS, the HT Protection field is * reserved, but an HT STA shall protect HT transmissions as though @@ -1112,8 +1111,6 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) struct sta_info *sta; struct beacon_data *presp; - mutex_lock(&sdata->u.ibss.mtx); - active_ibss = ieee80211_sta_active_ibss(sdata); if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) { @@ -1157,7 +1154,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) /* remove beacon */ kfree(sdata->u.ibss.ie); presp = rcu_dereference_protected(ifibss->presp, - lockdep_is_held(&sdata->u.ibss.mtx)); + lockdep_is_held(&sdata->wdev.mtx)); RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); sdata->vif.bss_conf.ibss_joined = false; sdata->vif.bss_conf.ibss_creator = false; @@ -1173,7 +1170,5 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) del_timer_sync(&sdata->u.ibss.timer); - mutex_unlock(&sdata->u.ibss.mtx); - return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9ca8e3278cc0..923e1772e8f3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -394,7 +394,6 @@ struct ieee80211_if_managed { bool nullfunc_failed; bool connection_loss; - struct mutex mtx; struct cfg80211_bss *associated; struct ieee80211_mgd_auth_data *auth_data; struct ieee80211_mgd_assoc_data *assoc_data; @@ -488,8 +487,6 @@ struct ieee80211_if_managed { struct ieee80211_if_ibss { struct timer_list timer; - struct mutex mtx; - unsigned long last_scan_completed; u32 basic_rates; @@ -580,8 +577,6 @@ struct ieee80211_if_mesh { bool accepting_plinks; int num_gates; struct beacon_data __rcu *beacon; - /* just protects beacon updates for now */ - struct mutex mtx; const u8 *ie; u8 ie_len; enum { @@ -778,6 +773,26 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) return container_of(p, struct ieee80211_sub_if_data, vif); } +static inline void sdata_lock(struct ieee80211_sub_if_data *sdata) + __acquires(&sdata->wdev.mtx) +{ + mutex_lock(&sdata->wdev.mtx); + __acquire(&sdata->wdev.mtx); +} + +static inline void sdata_unlock(struct ieee80211_sub_if_data *sdata) + __releases(&sdata->wdev.mtx) +{ + mutex_unlock(&sdata->wdev.mtx); + __release(&sdata->wdev.mtx); +} + +static inline void +sdata_assert_lock(struct ieee80211_sub_if_data *sdata) +{ + lockdep_assert_held(&sdata->wdev.mtx); +} + static inline enum ieee80211_band ieee80211_get_sdata_band(struct ieee80211_sub_if_data *sdata) { @@ -1507,9 +1522,6 @@ static inline void ieee802_11_parse_elems(const u8 *start, size_t len, ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0); } -u32 ieee80211_mandatory_rates(struct ieee80211_local *local, - enum ieee80211_band band); - void ieee80211_dynamic_ps_enable_work(struct work_struct *work); void ieee80211_dynamic_ps_disable_work(struct work_struct *work); void ieee80211_dynamic_ps_timer(unsigned long data); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 98d20c0f6fed..cc117591f678 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -159,7 +159,8 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr) +static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, + bool check_dup) { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *iter; @@ -180,13 +181,16 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr) ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) | ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8); + if (!check_dup) + return ret; mutex_lock(&local->iflist_mtx); list_for_each_entry(iter, &local->interfaces, list) { if (iter == sdata) continue; - if (iter->vif.type == NL80211_IFTYPE_MONITOR) + if (iter->vif.type == NL80211_IFTYPE_MONITOR && + !(iter->u.mntr_flags & MONITOR_FLAG_ACTIVE)) continue; m = iter->vif.addr; @@ -208,12 +212,17 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sockaddr *sa = addr; + bool check_dup = true; int ret; if (ieee80211_sdata_running(sdata)) return -EBUSY; - ret = ieee80211_verify_mac(sdata, sa->sa_data); + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + check_dup = false; + + ret = ieee80211_verify_mac(sdata, sa->sa_data, check_dup); if (ret) return ret; @@ -545,7 +554,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) break; } - if (local->monitors == 0 && local->open_count == 0) { + if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) { + res = drv_add_interface(local, sdata); + if (res) + goto err_stop; + } else if (local->monitors == 0 && local->open_count == 0) { res = ieee80211_add_virtual_monitor(local); if (res) goto err_stop; @@ -923,7 +936,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->mtx); ieee80211_recalc_idle(local); mutex_unlock(&local->mtx); - break; + + if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + break; + + /* fall through */ default: if (going_down) drv_remove_interface(local, sdata); @@ -1072,7 +1089,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_start_xmit = ieee80211_monitor_start_xmit, .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_monitor_select_queue, }; @@ -1747,10 +1764,9 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) } static int netdev_notify(struct notifier_block *nb, - unsigned long state, - void *ndev) + unsigned long state, void *ptr) { - struct net_device *dev = ndev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ieee80211_sub_if_data *sdata; if (state != NETDEV_CHANGENAME) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 67059b88fea5..e39cc91d0cf1 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -335,12 +335,12 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, switch (cipher) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: - key->conf.iv_len = WEP_IV_LEN; - key->conf.icv_len = WEP_ICV_LEN; + key->conf.iv_len = IEEE80211_WEP_IV_LEN; + key->conf.icv_len = IEEE80211_WEP_ICV_LEN; break; case WLAN_CIPHER_SUITE_TKIP: - key->conf.iv_len = TKIP_IV_LEN; - key->conf.icv_len = TKIP_ICV_LEN; + key->conf.iv_len = IEEE80211_TKIP_IV_LEN; + key->conf.icv_len = IEEE80211_TKIP_ICV_LEN; if (seq) { for (i = 0; i < IEEE80211_NUM_TIDS; i++) { key->u.tkip.rx[i].iv32 = @@ -352,13 +352,13 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, spin_lock_init(&key->u.tkip.txlock); break; case WLAN_CIPHER_SUITE_CCMP: - key->conf.iv_len = CCMP_HDR_LEN; - key->conf.icv_len = CCMP_MIC_LEN; + key->conf.iv_len = IEEE80211_CCMP_HDR_LEN; + key->conf.icv_len = IEEE80211_CCMP_MIC_LEN; if (seq) { for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) - for (j = 0; j < CCMP_PN_LEN; j++) + for (j = 0; j < IEEE80211_CCMP_PN_LEN; j++) key->u.ccmp.rx_pn[i][j] = - seq[CCMP_PN_LEN - j - 1]; + seq[IEEE80211_CCMP_PN_LEN - j - 1]; } /* * Initialize AES key state here as an optimization so that @@ -375,9 +375,9 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, key->conf.iv_len = 0; key->conf.icv_len = sizeof(struct ieee80211_mmie); if (seq) - for (j = 0; j < CMAC_PN_LEN; j++) + for (j = 0; j < IEEE80211_CMAC_PN_LEN; j++) key->u.aes_cmac.rx_pn[j] = - seq[CMAC_PN_LEN - j - 1]; + seq[IEEE80211_CMAC_PN_LEN - j - 1]; /* * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. @@ -740,13 +740,13 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf, pn = key->u.ccmp.rx_pn[IEEE80211_NUM_TIDS]; else pn = key->u.ccmp.rx_pn[tid]; - memcpy(seq->ccmp.pn, pn, CCMP_PN_LEN); + memcpy(seq->ccmp.pn, pn, IEEE80211_CCMP_PN_LEN); break; case WLAN_CIPHER_SUITE_AES_CMAC: if (WARN_ON(tid != 0)) return; pn = key->u.aes_cmac.rx_pn; - memcpy(seq->aes_cmac.pn, pn, CMAC_PN_LEN); + memcpy(seq->aes_cmac.pn, pn, IEEE80211_CMAC_PN_LEN); break; } } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index e8de3e6d7804..036d57e76a5e 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -19,17 +19,6 @@ #define NUM_DEFAULT_KEYS 4 #define NUM_DEFAULT_MGMT_KEYS 2 -#define WEP_IV_LEN 4 -#define WEP_ICV_LEN 4 -#define ALG_CCMP_KEY_LEN 16 -#define CCMP_HDR_LEN 8 -#define CCMP_MIC_LEN 8 -#define CCMP_TK_LEN 16 -#define CCMP_PN_LEN 6 -#define TKIP_IV_LEN 8 -#define TKIP_ICV_LEN 4 -#define CMAC_PN_LEN 6 - struct ieee80211_local; struct ieee80211_sub_if_data; struct sta_info; @@ -93,13 +82,13 @@ struct ieee80211_key { * frames and the last counter is used with Robust * Management frames. */ - u8 rx_pn[IEEE80211_NUM_TIDS + 1][CCMP_PN_LEN]; + u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_CCMP_PN_LEN]; struct crypto_cipher *tfm; u32 replays; /* dot11RSNAStatsCCMPReplays */ } ccmp; struct { atomic64_t tx_pn; - u8 rx_pn[CMAC_PN_LEN]; + u8 rx_pn[IEEE80211_CMAC_PN_LEN]; struct crypto_cipher *tfm; u32 replays; /* dot11RSNAStatsCMACReplays */ u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8a7bfc47d577..1998f1475267 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -331,7 +331,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, return NOTIFY_DONE; ifmgd = &sdata->u.mgd; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); /* Copy the addresses to the bss_conf list */ ifa = idev->ifa_list; @@ -349,7 +349,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_ARP_FILTER); - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return NOTIFY_DONE; } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6952760881c8..b3d1fdd46368 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -161,8 +161,11 @@ void mesh_sta_cleanup(struct sta_info *sta) del_timer_sync(&sta->plink_timer); } - if (changed) + if (changed) { + sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); + sdata_unlock(sdata); + } } int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) @@ -577,7 +580,9 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) mesh_path_expire(sdata); changed = mesh_accept_plinks_update(sdata); + sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); + sdata_unlock(sdata); mod_timer(&ifmsh->housekeeping_timer, round_jiffies(jiffies + @@ -697,25 +702,21 @@ out_free: } static int -ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh) +ieee80211_mesh_rebuild_beacon(struct ieee80211_sub_if_data *sdata) { struct beacon_data *old_bcn; int ret; - mutex_lock(&ifmsh->mtx); - - old_bcn = rcu_dereference_protected(ifmsh->beacon, - lockdep_is_held(&ifmsh->mtx)); - ret = ieee80211_mesh_build_beacon(ifmsh); + old_bcn = rcu_dereference_protected(sdata->u.mesh.beacon, + lockdep_is_held(&sdata->wdev.mtx)); + ret = ieee80211_mesh_build_beacon(&sdata->u.mesh); if (ret) /* just reuse old beacon */ - goto out; + return ret; if (old_bcn) kfree_rcu(old_bcn, rcu_head); -out: - mutex_unlock(&ifmsh->mtx); - return ret; + return 0; } void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, @@ -726,7 +727,7 @@ void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_HT | BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT))) - if (ieee80211_mesh_rebuild_beacon(&sdata->u.mesh)) + if (ieee80211_mesh_rebuild_beacon(sdata)) return; ieee80211_bss_info_change_notify(sdata, changed); } @@ -741,6 +742,8 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT; enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + struct ieee80211_supported_band *sband = + sdata->local->hw.wiphy->bands[band]; local->fif_other_bss++; /* mesh ifaces must set allmulti to forward mcast traffic */ @@ -748,7 +751,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) ieee80211_configure_filter(local); ifmsh->mesh_cc_id = 0; /* Disabled */ - ifmsh->mesh_auth_id = 0; /* Disabled */ /* register sync ops from extensible synchronization framework */ ifmsh->sync_ops = ieee80211_mesh_sync_ops_get(ifmsh->mesh_sp_id); ifmsh->adjusting_tbtt = false; @@ -759,8 +761,7 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.ht_operation_mode = ifmsh->mshcfg.ht_opmode; sdata->vif.bss_conf.enable_beacon = true; - sdata->vif.bss_conf.basic_rates = - ieee80211_mandatory_rates(local, band); + sdata->vif.bss_conf.basic_rates = ieee80211_mandatory_rates(sband); changed |= ieee80211_mps_local_status_update(sdata); @@ -788,12 +789,12 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.enable_beacon = false; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); - mutex_lock(&ifmsh->mtx); + sdata_lock(sdata); bcn = rcu_dereference_protected(ifmsh->beacon, - lockdep_is_held(&ifmsh->mtx)); + lockdep_is_held(&sdata->wdev.mtx)); rcu_assign_pointer(ifmsh->beacon, NULL); kfree_rcu(bcn, rcu_head); - mutex_unlock(&ifmsh->mtx); + sdata_unlock(sdata); /* flush STAs and mpaths on this iface */ sta_info_flush(sdata); @@ -1041,7 +1042,6 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) spin_lock_init(&ifmsh->mesh_preq_queue_lock); spin_lock_init(&ifmsh->sync_offset_lock); RCU_INIT_POINTER(ifmsh->beacon, NULL); - mutex_init(&ifmsh->mtx); sdata->vif.bss_conf.bssid = zero_addr; } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 09bebed99416..6c4da99bc4fb 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -517,7 +517,9 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, ieee80211_mps_frame_release(sta, elems); out: rcu_read_unlock(); + sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); + sdata_unlock(sdata); } static void mesh_plink_timer(unsigned long data) @@ -1068,6 +1070,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); - if (changed) + if (changed) { + sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); + sdata_unlock(sdata); + } } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 741448b30825..118540b16729 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -91,41 +91,6 @@ MODULE_PARM_DESC(probe_wait_ms, #define IEEE80211_SIGNAL_AVE_MIN_COUNT 4 /* - * All cfg80211 functions have to be called outside a locked - * section so that they can acquire a lock themselves... This - * is much simpler than queuing up things in cfg80211, but we - * do need some indirection for that here. - */ -enum rx_mgmt_action { - /* no action required */ - RX_MGMT_NONE, - - /* caller must call cfg80211_send_deauth() */ - RX_MGMT_CFG80211_DEAUTH, - - /* caller must call cfg80211_send_disassoc() */ - RX_MGMT_CFG80211_DISASSOC, - - /* caller must call cfg80211_send_rx_auth() */ - RX_MGMT_CFG80211_RX_AUTH, - - /* caller must call cfg80211_send_rx_assoc() */ - RX_MGMT_CFG80211_RX_ASSOC, - - /* caller must call cfg80211_send_assoc_timeout() */ - RX_MGMT_CFG80211_ASSOC_TIMEOUT, - - /* used when a processed beacon causes a deauth */ - RX_MGMT_CFG80211_TX_DEAUTH, -}; - -/* utils */ -static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd) -{ - lockdep_assert_held(&ifmgd->mtx); -} - -/* * We can have multiple work items (and connection probing) * scheduling this timer, but we need to take care to only * reschedule it when it should fire _earlier_ than it was @@ -135,13 +100,14 @@ static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd) * has happened -- the work that runs from this timer will * do that. */ -static void run_again(struct ieee80211_if_managed *ifmgd, unsigned long timeout) +static void run_again(struct ieee80211_sub_if_data *sdata, + unsigned long timeout) { - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); - if (!timer_pending(&ifmgd->timer) || - time_before(timeout, ifmgd->timer.expires)) - mod_timer(&ifmgd->timer, timeout); + if (!timer_pending(&sdata->u.mgd.timer) || + time_before(timeout, sdata->u.mgd.timer.expires)) + mod_timer(&sdata->u.mgd.timer, timeout); } void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata) @@ -652,7 +618,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) struct ieee80211_channel *chan; u32 rates = 0; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); @@ -962,7 +928,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (!ieee80211_sdata_running(sdata)) return; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) goto out; @@ -985,7 +951,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) IEEE80211_QUEUE_STOP_REASON_CSA); out: ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) @@ -1036,7 +1002,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, const struct ieee80211_ht_operation *ht_oper; int secondary_channel_offset = -1; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (!cbss) return; @@ -1390,6 +1356,9 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) IEEE80211_STA_CONNECTION_POLL)) return false; + if (!sdata->vif.bss_conf.dtim_period) + return false; + rcu_read_lock(); sta = sta_info_get(sdata, mgd->bssid); if (sta) @@ -1842,7 +1811,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; u32 changed = 0; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (WARN_ON_ONCE(tx && !frame_buf)) return; @@ -2051,7 +2020,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) } ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); - run_again(ifmgd, ifmgd->probe_timeout); + run_again(sdata, ifmgd->probe_timeout); if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) ieee80211_flush_queues(sdata->local, sdata); } @@ -2065,7 +2034,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, if (!ieee80211_sdata_running(sdata)) return; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) goto out; @@ -2119,7 +2088,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, ifmgd->probe_send_count = 0; ieee80211_mgd_probe_ap_send(sdata); out: - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, @@ -2135,7 +2104,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) return NULL; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (ifmgd->associated) cbss = ifmgd->associated; @@ -2168,9 +2137,9 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) { - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return; } @@ -2181,13 +2150,9 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) ieee80211_wake_queues_by_reason(&sdata->local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - mutex_unlock(&ifmgd->mtx); - /* - * must be outside lock due to cfg80211, - * but that's not a problem. - */ cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); + sdata_unlock(sdata); } static void ieee80211_beacon_connection_loss_work(struct work_struct *work) @@ -2254,7 +2219,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, { struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); if (!assoc) { sta_info_destroy_addr(sdata, auth_data->bss->bssid); @@ -2295,27 +2260,26 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, auth_data->key_idx, tx_flags); } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) +static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 bssid[ETH_ALEN]; u16 auth_alg, auth_transaction, status_code; struct sta_info *sta; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (len < 24 + 6) - return RX_MGMT_NONE; + return; if (!ifmgd->auth_data || ifmgd->auth_data->done) - return RX_MGMT_NONE; + return; memcpy(bssid, ifmgd->auth_data->bss->bssid, ETH_ALEN); if (!ether_addr_equal(bssid, mgmt->bssid)) - return RX_MGMT_NONE; + return; auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); @@ -2327,14 +2291,15 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, mgmt->sa, auth_alg, ifmgd->auth_data->algorithm, auth_transaction, ifmgd->auth_data->expected_transaction); - return RX_MGMT_NONE; + return; } if (status_code != WLAN_STATUS_SUCCESS) { sdata_info(sdata, "%pM denied authentication (status %d)\n", mgmt->sa, status_code); ieee80211_destroy_auth_data(sdata, false); - return RX_MGMT_CFG80211_RX_AUTH; + cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + return; } switch (ifmgd->auth_data->algorithm) { @@ -2347,20 +2312,20 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, if (ifmgd->auth_data->expected_transaction != 4) { ieee80211_auth_challenge(sdata, mgmt, len); /* need another frame */ - return RX_MGMT_NONE; + return; } break; default: WARN_ONCE(1, "invalid auth alg %d", ifmgd->auth_data->algorithm); - return RX_MGMT_NONE; + return; } sdata_info(sdata, "authenticated\n"); ifmgd->auth_data->done = true; ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC; ifmgd->auth_data->timeout_started = true; - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && ifmgd->auth_data->expected_transaction != 2) { @@ -2368,7 +2333,8 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, * Report auth frame to user space for processing since another * round of Authentication frames is still needed. */ - return RX_MGMT_CFG80211_RX_AUTH; + cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + return; } /* move station state to auth */ @@ -2384,30 +2350,29 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, } mutex_unlock(&sdata->local->sta_mtx); - return RX_MGMT_CFG80211_RX_AUTH; + cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + return; out_err: mutex_unlock(&sdata->local->sta_mtx); /* ignore frame -- wait for timeout */ - return RX_MGMT_NONE; } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) +static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; const u8 *bssid = NULL; u16 reason_code; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (len < 24 + 2) - return RX_MGMT_NONE; + return; if (!ifmgd->associated || !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return RX_MGMT_NONE; + return; bssid = ifmgd->associated->bssid; @@ -2418,25 +2383,24 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - return RX_MGMT_CFG80211_DEAUTH; + cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, len); } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) +static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u16 reason_code; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (len < 24 + 2) - return RX_MGMT_NONE; + return; if (!ifmgd->associated || !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return RX_MGMT_NONE; + return; reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); @@ -2445,7 +2409,7 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - return RX_MGMT_CFG80211_DISASSOC; + cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, len); } static void ieee80211_get_rates(struct ieee80211_supported_band *sband, @@ -2495,7 +2459,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, { struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); if (!assoc) { sta_info_destroy_addr(sdata, assoc_data->bss->bssid); @@ -2749,10 +2713,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, return ret; } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len, - struct cfg80211_bss **bss) +static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data; @@ -2760,13 +2723,14 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems elems; u8 *pos; bool reassoc; + struct cfg80211_bss *bss; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (!assoc_data) - return RX_MGMT_NONE; + return; if (!ether_addr_equal(assoc_data->bss->bssid, mgmt->bssid)) - return RX_MGMT_NONE; + return; /* * AssocResp and ReassocResp have identical structure, so process both @@ -2774,7 +2738,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, */ if (len < 24 + 6) - return RX_MGMT_NONE; + return; reassoc = ieee80211_is_reassoc_req(mgmt->frame_control); capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); @@ -2801,22 +2765,23 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, assoc_data->timeout = jiffies + msecs_to_jiffies(ms); assoc_data->timeout_started = true; if (ms > IEEE80211_ASSOC_TIMEOUT) - run_again(ifmgd, assoc_data->timeout); - return RX_MGMT_NONE; + run_again(sdata, assoc_data->timeout); + return; } - *bss = assoc_data->bss; + bss = assoc_data->bss; if (status_code != WLAN_STATUS_SUCCESS) { sdata_info(sdata, "%pM denied association (code=%d)\n", mgmt->sa, status_code); ieee80211_destroy_assoc_data(sdata, false); } else { - if (!ieee80211_assoc_success(sdata, *bss, mgmt, len)) { + if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ ieee80211_destroy_assoc_data(sdata, false); - cfg80211_put_bss(sdata->local->hw.wiphy, *bss); - return RX_MGMT_CFG80211_ASSOC_TIMEOUT; + cfg80211_put_bss(sdata->local->hw.wiphy, bss); + cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid); + return; } sdata_info(sdata, "associated\n"); @@ -2828,7 +2793,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ieee80211_destroy_assoc_data(sdata, true); } - return RX_MGMT_CFG80211_RX_ASSOC; + cfg80211_send_rx_assoc(sdata->dev, bss, (u8 *)mgmt, len); } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, @@ -2842,7 +2807,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel; bool need_ps = false; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); if ((sdata->u.mgd.associated && ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) || @@ -2901,7 +2866,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ifmgd = &sdata->u.mgd; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (!ether_addr_equal(mgmt->da, sdata->vif.addr)) return; /* ignore ProbeResp to foreign address */ @@ -2926,7 +2891,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ifmgd->auth_data->tries = 0; ifmgd->auth_data->timeout = jiffies; ifmgd->auth_data->timeout_started = true; - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); } } @@ -2951,10 +2916,9 @@ static const u64 care_about_ies = (1ULL << WLAN_EID_HT_CAPABILITY) | (1ULL << WLAN_EID_HT_OPERATION); -static enum rx_mgmt_action -ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len, - u8 *deauth_buf, struct ieee80211_rx_status *rx_status) +static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; @@ -2969,24 +2933,25 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, u8 erp_value = 0; u32 ncrc; u8 *bssid; + u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); /* Process beacon from the current BSS */ baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; if (baselen > len) - return RX_MGMT_NONE; + return; rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (!chanctx_conf) { rcu_read_unlock(); - return RX_MGMT_NONE; + return; } if (rx_status->freq != chanctx_conf->def.chan->center_freq) { rcu_read_unlock(); - return RX_MGMT_NONE; + return; } chan = chanctx_conf->def.chan; rcu_read_unlock(); @@ -3013,13 +2978,13 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, /* continue assoc process */ ifmgd->assoc_data->timeout = jiffies; ifmgd->assoc_data->timeout_started = true; - run_again(ifmgd, ifmgd->assoc_data->timeout); - return RX_MGMT_NONE; + run_again(sdata, ifmgd->assoc_data->timeout); + return; } if (!ifmgd->associated || !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return RX_MGMT_NONE; + return; bssid = ifmgd->associated->bssid; /* Track average RSSI from the Beacon frames of the current AP */ @@ -3165,7 +3130,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid) - return RX_MGMT_NONE; + return; ifmgd->beacon_crc = ncrc; ifmgd->beacon_crc_valid = true; @@ -3199,6 +3164,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } changed |= BSS_CHANGED_DTIM_PERIOD; + ieee80211_recalc_ps_vif(sdata); } if (elems.erp_info) { @@ -3220,7 +3186,9 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, true, deauth_buf); - return RX_MGMT_CFG80211_TX_DEAUTH; + cfg80211_send_deauth(sdata->dev, deauth_buf, + sizeof(deauth_buf)); + return; } if (sta && elems.opmode_notif) @@ -3237,19 +3205,13 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, elems.pwr_constr_elem); ieee80211_bss_info_change_notify(sdata, changed); - - return RX_MGMT_NONE; } void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; - struct cfg80211_bss *bss = NULL; - enum rx_mgmt_action rma = RX_MGMT_NONE; - u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; u16 fc; struct ieee802_11_elems elems; int ies_len; @@ -3258,28 +3220,27 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_BEACON: - rma = ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, - deauth_buf, rx_status); + ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status); break; case IEEE80211_STYPE_PROBE_RESP: ieee80211_rx_mgmt_probe_resp(sdata, skb); break; case IEEE80211_STYPE_AUTH: - rma = ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len); + ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_DEAUTH: - rma = ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len); + ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_DISASSOC: - rma = ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); + ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ASSOC_RESP: case IEEE80211_STYPE_REASSOC_RESP: - rma = ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, &bss); + ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ACTION: if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) { @@ -3325,34 +3286,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, } break; } - mutex_unlock(&ifmgd->mtx); - - switch (rma) { - case RX_MGMT_NONE: - /* no action */ - break; - case RX_MGMT_CFG80211_DEAUTH: - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_DISASSOC: - cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_RX_AUTH: - cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_RX_ASSOC: - cfg80211_send_rx_assoc(sdata->dev, bss, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_ASSOC_TIMEOUT: - cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid); - break; - case RX_MGMT_CFG80211_TX_DEAUTH: - cfg80211_send_deauth(sdata->dev, deauth_buf, - sizeof(deauth_buf)); - break; - default: - WARN(1, "unexpected: %d", rma); - } + sdata_unlock(sdata); } static void ieee80211_sta_timer(unsigned long data) @@ -3366,20 +3300,12 @@ static void ieee80211_sta_timer(unsigned long data) static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, u8 *bssid, u8 reason, bool tx) { - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason, tx, frame_buf); - mutex_unlock(&ifmgd->mtx); - /* - * must be outside lock due to cfg80211, - * but that's not a problem. - */ cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); - - mutex_lock(&ifmgd->mtx); } static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) @@ -3389,7 +3315,7 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) struct ieee80211_mgd_auth_data *auth_data = ifmgd->auth_data; u32 tx_flags = 0; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (WARN_ON_ONCE(!auth_data)) return -EINVAL; @@ -3462,7 +3388,7 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) if (tx_flags == 0) { auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; ifmgd->auth_data->timeout_started = true; - run_again(ifmgd, auth_data->timeout); + run_again(sdata, auth_data->timeout); } else { auth_data->timeout_started = false; } @@ -3475,7 +3401,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; struct ieee80211_local *local = sdata->local; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); assoc_data->tries++; if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) { @@ -3499,7 +3425,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT; assoc_data->timeout_started = true; - run_again(&sdata->u.mgd, assoc_data->timeout); + run_again(sdata, assoc_data->timeout); } else { assoc_data->timeout_started = false; } @@ -3524,7 +3450,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (ifmgd->status_received) { __le16 fc = ifmgd->status_fc; @@ -3536,7 +3462,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if (status_acked) { ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT_SHORT; - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); } else { ifmgd->auth_data->timeout = jiffies - 1; } @@ -3547,7 +3473,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if (status_acked) { ifmgd->assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT_SHORT; - run_again(ifmgd, ifmgd->assoc_data->timeout); + run_again(sdata, ifmgd->assoc_data->timeout); } else { ifmgd->assoc_data->timeout = jiffies - 1; } @@ -3570,12 +3496,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ieee80211_destroy_auth_data(sdata, false); - mutex_unlock(&ifmgd->mtx); cfg80211_send_auth_timeout(sdata->dev, bssid); - mutex_lock(&ifmgd->mtx); } } else if (ifmgd->auth_data && ifmgd->auth_data->timeout_started) - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started && time_after(jiffies, ifmgd->assoc_data->timeout)) { @@ -3588,12 +3512,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ieee80211_destroy_assoc_data(sdata, false); - mutex_unlock(&ifmgd->mtx); cfg80211_send_assoc_timeout(sdata->dev, bssid); - mutex_lock(&ifmgd->mtx); } } else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started) - run_again(ifmgd, ifmgd->assoc_data->timeout); + run_again(sdata, ifmgd->assoc_data->timeout); if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | IEEE80211_STA_CONNECTION_POLL) && @@ -3627,7 +3549,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) false); } } else if (time_is_after_jiffies(ifmgd->probe_timeout)) - run_again(ifmgd, ifmgd->probe_timeout); + run_again(sdata, ifmgd->probe_timeout); else if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { mlme_dbg(sdata, "Failed to send nullfunc to AP %pM after %dms, disconnecting\n", @@ -3656,7 +3578,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) } } - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } static void ieee80211_sta_bcn_mon_timer(unsigned long data) @@ -3717,9 +3639,9 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) { - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return; } @@ -3730,10 +3652,10 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) ifmgd->associated->bssid, WLAN_REASON_UNSPECIFIED, true); - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return; } - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } #endif @@ -3765,8 +3687,6 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifmgd->uapsd_max_sp_len = sdata->local->hw.uapsd_max_sp_len; ifmgd->p2p_noa_index = -1; - mutex_init(&ifmgd->mtx); - if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS) ifmgd->req_smps = IEEE80211_SMPS_AUTOMATIC; else @@ -4122,8 +4042,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, /* try to authenticate/probe */ - mutex_lock(&ifmgd->mtx); - if ((ifmgd->auth_data && !ifmgd->auth_data->done) || ifmgd->assoc_data) { err = -EBUSY; @@ -4143,8 +4061,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, WLAN_REASON_UNSPECIFIED, false, frame_buf); - __cfg80211_send_deauth(sdata->dev, frame_buf, - sizeof(frame_buf)); + cfg80211_send_deauth(sdata->dev, frame_buf, + sizeof(frame_buf)); } sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); @@ -4161,8 +4079,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, /* hold our own reference */ cfg80211_ref_bss(local->hw.wiphy, auth_data->bss); - err = 0; - goto out_unlock; + return 0; err_clear: memset(ifmgd->bssid, 0, ETH_ALEN); @@ -4170,9 +4087,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, ifmgd->auth_data = NULL; err_free: kfree(auth_data); - out_unlock: - mutex_unlock(&ifmgd->mtx); - return err; } @@ -4203,8 +4117,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, assoc_data->ssid_len = ssidie[1]; rcu_read_unlock(); - mutex_lock(&ifmgd->mtx); - if (ifmgd->associated) { u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; @@ -4212,8 +4124,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, WLAN_REASON_UNSPECIFIED, false, frame_buf); - __cfg80211_send_deauth(sdata->dev, frame_buf, - sizeof(frame_buf)); + cfg80211_send_deauth(sdata->dev, frame_buf, + sizeof(frame_buf)); } if (ifmgd->auth_data && !ifmgd->auth_data->done) { @@ -4407,7 +4319,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, } rcu_read_unlock(); - run_again(ifmgd, assoc_data->timeout); + run_again(sdata, assoc_data->timeout); if (bss->corrupt_data) { char *corrupt_type = "data"; @@ -4423,17 +4335,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, corrupt_type); } - err = 0; - goto out; + return 0; err_clear: memset(ifmgd->bssid, 0, ETH_ALEN); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); ifmgd->assoc_data = NULL; err_free: kfree(assoc_data); - out: - mutex_unlock(&ifmgd->mtx); - return err; } @@ -4445,8 +4353,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, bool tx = !req->local_state_change; bool report_frame = false; - mutex_lock(&ifmgd->mtx); - sdata_info(sdata, "deauthenticating from %pM by local choice (reason=%d)\n", req->bssid, req->reason_code); @@ -4458,7 +4364,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->reason_code, tx, frame_buf); ieee80211_destroy_auth_data(sdata, false); - mutex_unlock(&ifmgd->mtx); report_frame = true; goto out; @@ -4470,12 +4375,11 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->reason_code, tx, frame_buf); report_frame = true; } - mutex_unlock(&ifmgd->mtx); out: if (report_frame) - __cfg80211_send_deauth(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_send_deauth(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); return 0; } @@ -4487,18 +4391,14 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, u8 bssid[ETH_ALEN]; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - mutex_lock(&ifmgd->mtx); - /* * cfg80211 should catch this ... but it's racy since * we can receive a disassoc frame, process it, hand it * to cfg80211 while that's in a locked section already * trying to tell us that the user wants to disconnect. */ - if (ifmgd->associated != req->bss) { - mutex_unlock(&ifmgd->mtx); + if (ifmgd->associated != req->bss) return -ENOLINK; - } sdata_info(sdata, "disassociating from %pM by local choice (reason=%d)\n", @@ -4508,10 +4408,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DISASSOC, req->reason_code, !req->local_state_change, frame_buf); - mutex_unlock(&ifmgd->mtx); - __cfg80211_send_disassoc(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_send_disassoc(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); return 0; } @@ -4531,13 +4430,13 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) cancel_work_sync(&ifmgd->csa_connection_drop_work); cancel_work_sync(&ifmgd->chswitch_work); - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (ifmgd->assoc_data) ieee80211_destroy_assoc_data(sdata, false); if (ifmgd->auth_data) ieee80211_destroy_auth_data(sdata, false); del_timer_sync(&ifmgd->timer); - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8e2952620256..bdd7b4a719e9 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -258,6 +258,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos += 2; if (status->flag & RX_FLAG_HT) { + unsigned int stbc; + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); *pos++ = local->hw.radiotap_mcs_details; *pos = 0; @@ -267,6 +269,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, *pos |= IEEE80211_RADIOTAP_MCS_BW_40; if (status->flag & RX_FLAG_HT_GF) *pos |= IEEE80211_RADIOTAP_MCS_FMT_GF; + stbc = (status->flag & RX_FLAG_STBC_MASK) >> RX_FLAG_STBC_SHIFT; + *pos |= stbc << IEEE80211_RADIOTAP_MCS_STBC_SHIFT; pos++; *pos++ = status->rate_idx; } @@ -1372,6 +1376,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + int i; if (!sta) return RX_CONTINUE; @@ -1422,6 +1427,19 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) ewma_add(&sta->avg_signal, -status->signal); } + if (status->chains) { + sta->chains = status->chains; + for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { + int signal = status->chain_signal[i]; + + if (!(status->chains & BIT(i))) + continue; + + sta->chain_signal_last[i] = signal; + ewma_add(&sta->chain_signal_avg[i], -signal); + } + } + /* * Change STA power saving mode only at the end of a frame * exchange sequence. @@ -1608,7 +1626,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) entry->ccmp = 1; memcpy(entry->last_pn, rx->key->u.ccmp.rx_pn[queue], - CCMP_PN_LEN); + IEEE80211_CCMP_PN_LEN); } return RX_QUEUED; } @@ -1627,21 +1645,21 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) * (IEEE 802.11i, 8.3.3.4.5) */ if (entry->ccmp) { int i; - u8 pn[CCMP_PN_LEN], *rpn; + u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; int queue; if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP) return RX_DROP_UNUSABLE; - memcpy(pn, entry->last_pn, CCMP_PN_LEN); - for (i = CCMP_PN_LEN - 1; i >= 0; i--) { + memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); + for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { pn[i]++; if (pn[i]) break; } queue = rx->security_idx; rpn = rx->key->u.ccmp.rx_pn[queue]; - if (memcmp(pn, rpn, CCMP_PN_LEN)) + if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN)) return RX_DROP_UNUSABLE; - memcpy(entry->last_pn, pn, CCMP_PN_LEN); + memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN); } skb_pull(rx->skb, ieee80211_hdrlen(fc)); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 11216bc13b27..a04c5671d7fd 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -358,6 +358,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, do_posix_clock_monotonic_gettime(&uptime); sta->last_connected = uptime.tv_sec; ewma_init(&sta->avg_signal, 1024, 8); + for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++) + ewma_init(&sta->chain_signal_avg[i], 1024, 8); if (sta_prepare_rate_control(local, sta, gfp)) { kfree(sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index adc30045f99e..41c28b977f7c 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -344,6 +344,11 @@ struct sta_info { int last_signal; struct ewma avg_signal; int last_ack_signal; + + u8 chains; + s8 chain_signal_last[IEEE80211_MAX_CHAINS]; + struct ewma chain_signal_avg[IEEE80211_MAX_CHAINS]; + /* Plus 1 for non-QoS frames */ __le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1]; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 9972e07a2f96..34be9336b5d1 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -398,13 +398,14 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) if (ieee80211_has_order(hdr->frame_control)) return TX_CONTINUE; + if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) + info->hw_queue = tx->sdata->vif.cab_queue; + /* no stations in PS mode */ if (!atomic_read(&ps->num_sta_ps)) return TX_CONTINUE; info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM; - if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) - info->hw_queue = tx->sdata->vif.cab_queue; /* device releases frame after DTIM beacon */ if (!(tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING)) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 72e6292955bb..c75d3db2a31c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -560,6 +560,9 @@ void ieee80211_iterate_active_interfaces( list_for_each_entry(sdata, &local->interfaces, list) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: + if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + continue; + break; case NL80211_IFTYPE_AP_VLAN: continue; default: @@ -598,6 +601,9 @@ void ieee80211_iterate_active_interfaces_atomic( list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: + if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + continue; + break; case NL80211_IFTYPE_AP_VLAN: continue; default: @@ -1072,32 +1078,6 @@ void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, ieee80211_set_wmm_default(sdata, true); } -u32 ieee80211_mandatory_rates(struct ieee80211_local *local, - enum ieee80211_band band) -{ - struct ieee80211_supported_band *sband; - struct ieee80211_rate *bitrates; - u32 mandatory_rates; - enum ieee80211_rate_flags mandatory_flag; - int i; - - sband = local->hw.wiphy->bands[band]; - if (WARN_ON(!sband)) - return 1; - - if (band == IEEE80211_BAND_2GHZ) - mandatory_flag = IEEE80211_RATE_MANDATORY_B; - else - mandatory_flag = IEEE80211_RATE_MANDATORY_A; - - bitrates = sband->bitrates; - mandatory_rates = 0; - for (i = 0; i < sband->n_bitrates; i++) - if (bitrates[i].flags & mandatory_flag) - mandatory_rates |= BIT(i); - return mandatory_rates; -} - void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, const u8 *extra, size_t extra_len, const u8 *da, @@ -1607,9 +1587,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (sdata->u.mgd.dtim_period) changed |= BSS_CHANGED_DTIM_PERIOD; - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); ieee80211_bss_info_change_notify(sdata, changed); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); break; case NL80211_IFTYPE_ADHOC: changed |= BSS_CHANGED_IBSS; diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index c04d401dae92..6ee2b5863572 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -28,7 +28,7 @@ int ieee80211_wep_init(struct ieee80211_local *local) { /* start WEP IV from a random value */ - get_random_bytes(&local->wep_iv, WEP_IV_LEN); + get_random_bytes(&local->wep_iv, IEEE80211_WEP_IV_LEN); local->wep_tx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(local->wep_tx_tfm)) { @@ -98,20 +98,21 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local, hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - if (WARN_ON(skb_tailroom(skb) < WEP_ICV_LEN || - skb_headroom(skb) < WEP_IV_LEN)) + if (WARN_ON(skb_tailroom(skb) < IEEE80211_WEP_ICV_LEN || + skb_headroom(skb) < IEEE80211_WEP_IV_LEN)) return NULL; hdrlen = ieee80211_hdrlen(hdr->frame_control); - newhdr = skb_push(skb, WEP_IV_LEN); - memmove(newhdr, newhdr + WEP_IV_LEN, hdrlen); + newhdr = skb_push(skb, IEEE80211_WEP_IV_LEN); + memmove(newhdr, newhdr + IEEE80211_WEP_IV_LEN, hdrlen); /* the HW only needs room for the IV, but not the actual IV */ if (info->control.hw_key && (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) return newhdr + hdrlen; - skb_set_network_header(skb, skb_network_offset(skb) + WEP_IV_LEN); + skb_set_network_header(skb, skb_network_offset(skb) + + IEEE80211_WEP_IV_LEN); ieee80211_wep_get_iv(local, keylen, keyidx, newhdr + hdrlen); return newhdr + hdrlen; } @@ -125,8 +126,8 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local, unsigned int hdrlen; hdrlen = ieee80211_hdrlen(hdr->frame_control); - memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen); - skb_pull(skb, WEP_IV_LEN); + memmove(skb->data + IEEE80211_WEP_IV_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_WEP_IV_LEN); } @@ -146,7 +147,7 @@ int ieee80211_wep_encrypt_data(struct crypto_cipher *tfm, u8 *rc4key, put_unaligned(icv, (__le32 *)(data + data_len)); crypto_cipher_setkey(tfm, rc4key, klen); - for (i = 0; i < data_len + WEP_ICV_LEN; i++) + for (i = 0; i < data_len + IEEE80211_WEP_ICV_LEN; i++) crypto_cipher_encrypt_one(tfm, data + i, data + i); return 0; @@ -172,7 +173,7 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, if (!iv) return -1; - len = skb->len - (iv + WEP_IV_LEN - skb->data); + len = skb->len - (iv + IEEE80211_WEP_IV_LEN - skb->data); /* Prepend 24-bit IV to RC4 key */ memcpy(rc4key, iv, 3); @@ -181,10 +182,10 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, memcpy(rc4key + 3, key, keylen); /* Add room for ICV */ - skb_put(skb, WEP_ICV_LEN); + skb_put(skb, IEEE80211_WEP_ICV_LEN); return ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3, - iv + WEP_IV_LEN, len); + iv + IEEE80211_WEP_IV_LEN, len); } @@ -201,11 +202,11 @@ int ieee80211_wep_decrypt_data(struct crypto_cipher *tfm, u8 *rc4key, return -1; crypto_cipher_setkey(tfm, rc4key, klen); - for (i = 0; i < data_len + WEP_ICV_LEN; i++) + for (i = 0; i < data_len + IEEE80211_WEP_ICV_LEN; i++) crypto_cipher_decrypt_one(tfm, data + i, data + i); crc = cpu_to_le32(~crc32_le(~0, data, data_len)); - if (memcmp(&crc, data + data_len, WEP_ICV_LEN) != 0) + if (memcmp(&crc, data + data_len, IEEE80211_WEP_ICV_LEN) != 0) /* ICV mismatch */ return -1; @@ -237,10 +238,10 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local, return -1; hdrlen = ieee80211_hdrlen(hdr->frame_control); - if (skb->len < hdrlen + WEP_IV_LEN + WEP_ICV_LEN) + if (skb->len < hdrlen + IEEE80211_WEP_IV_LEN + IEEE80211_WEP_ICV_LEN) return -1; - len = skb->len - hdrlen - WEP_IV_LEN - WEP_ICV_LEN; + len = skb->len - hdrlen - IEEE80211_WEP_IV_LEN - IEEE80211_WEP_ICV_LEN; keyidx = skb->data[hdrlen + 3] >> 6; @@ -256,16 +257,16 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local, memcpy(rc4key + 3, key->conf.key, key->conf.keylen); if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen, - skb->data + hdrlen + WEP_IV_LEN, - len)) + skb->data + hdrlen + + IEEE80211_WEP_IV_LEN, len)) ret = -1; /* Trim ICV */ - skb_trim(skb, skb->len - WEP_ICV_LEN); + skb_trim(skb, skb->len - IEEE80211_WEP_ICV_LEN); /* Remove IV */ - memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen); - skb_pull(skb, WEP_IV_LEN); + memmove(skb->data + IEEE80211_WEP_IV_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_WEP_IV_LEN); return ret; } @@ -305,13 +306,14 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) return RX_DROP_UNUSABLE; } else if (!(status->flag & RX_FLAG_IV_STRIPPED)) { - if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) + WEP_IV_LEN)) + if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) + + IEEE80211_WEP_IV_LEN)) return RX_DROP_UNUSABLE; if (rx->sta && ieee80211_wep_is_weak_iv(rx->skb, rx->key)) rx->sta->wep_weak_iv_count++; ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ - if (pskb_trim(rx->skb, rx->skb->len - WEP_ICV_LEN)) + if (pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) return RX_DROP_UNUSABLE; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index c7c6d644486f..c9edfcb7a13b 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -62,10 +62,10 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) tail = MICHAEL_MIC_LEN; if (!info->control.hw_key) - tail += TKIP_ICV_LEN; + tail += IEEE80211_TKIP_ICV_LEN; if (WARN_ON(skb_tailroom(skb) < tail || - skb_headroom(skb) < TKIP_IV_LEN)) + skb_headroom(skb) < IEEE80211_TKIP_IV_LEN)) return TX_DROP; key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY]; @@ -198,15 +198,16 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key) tail = 0; else - tail = TKIP_ICV_LEN; + tail = IEEE80211_TKIP_ICV_LEN; if (WARN_ON(skb_tailroom(skb) < tail || - skb_headroom(skb) < TKIP_IV_LEN)) + skb_headroom(skb) < IEEE80211_TKIP_IV_LEN)) return -1; - pos = skb_push(skb, TKIP_IV_LEN); - memmove(pos, pos + TKIP_IV_LEN, hdrlen); - skb_set_network_header(skb, skb_network_offset(skb) + TKIP_IV_LEN); + pos = skb_push(skb, IEEE80211_TKIP_IV_LEN); + memmove(pos, pos + IEEE80211_TKIP_IV_LEN, hdrlen); + skb_set_network_header(skb, skb_network_offset(skb) + + IEEE80211_TKIP_IV_LEN); pos += hdrlen; /* the HW only needs room for the IV, but not the actual IV */ @@ -227,7 +228,7 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) return 0; /* Add room for ICV */ - skb_put(skb, TKIP_ICV_LEN); + skb_put(skb, IEEE80211_TKIP_ICV_LEN); return ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm, key, skb, pos, len); @@ -290,11 +291,11 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; /* Trim ICV */ - skb_trim(skb, skb->len - TKIP_ICV_LEN); + skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN); /* Remove IV */ - memmove(skb->data + TKIP_IV_LEN, skb->data, hdrlen); - skb_pull(skb, TKIP_IV_LEN); + memmove(skb->data + IEEE80211_TKIP_IV_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_TKIP_IV_LEN); return RX_CONTINUE; } @@ -337,9 +338,9 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch, else qos_tid = 0; - data_len = skb->len - hdrlen - CCMP_HDR_LEN; + data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN; if (encrypted) - data_len -= CCMP_MIC_LEN; + data_len -= IEEE80211_CCMP_MIC_LEN; /* First block, b_0 */ b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */ @@ -348,7 +349,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch, */ b_0[1] = qos_tid | (mgmt << 4); memcpy(&b_0[2], hdr->addr2, ETH_ALEN); - memcpy(&b_0[8], pn, CCMP_PN_LEN); + memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN); /* l(m) */ put_unaligned_be16(data_len, &b_0[14]); @@ -424,15 +425,16 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key) tail = 0; else - tail = CCMP_MIC_LEN; + tail = IEEE80211_CCMP_MIC_LEN; if (WARN_ON(skb_tailroom(skb) < tail || - skb_headroom(skb) < CCMP_HDR_LEN)) + skb_headroom(skb) < IEEE80211_CCMP_HDR_LEN)) return -1; - pos = skb_push(skb, CCMP_HDR_LEN); - memmove(pos, pos + CCMP_HDR_LEN, hdrlen); - skb_set_network_header(skb, skb_network_offset(skb) + CCMP_HDR_LEN); + pos = skb_push(skb, IEEE80211_CCMP_HDR_LEN); + memmove(pos, pos + IEEE80211_CCMP_HDR_LEN, hdrlen); + skb_set_network_header(skb, skb_network_offset(skb) + + IEEE80211_CCMP_HDR_LEN); /* the HW only needs room for the IV, but not the actual IV */ if (info->control.hw_key && @@ -457,10 +459,10 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key) return 0; - pos += CCMP_HDR_LEN; + pos += IEEE80211_CCMP_HDR_LEN; ccmp_special_blocks(skb, pn, scratch, 0); ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, scratch, pos, len, - pos, skb_put(skb, CCMP_MIC_LEN)); + pos, skb_put(skb, IEEE80211_CCMP_MIC_LEN)); return 0; } @@ -490,7 +492,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - u8 pn[CCMP_PN_LEN]; + u8 pn[IEEE80211_CCMP_PN_LEN]; int data_len; int queue; @@ -500,12 +502,13 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) !ieee80211_is_robust_mgmt_frame(hdr)) return RX_CONTINUE; - data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN; + data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - + IEEE80211_CCMP_MIC_LEN; if (!rx->sta || data_len < 0) return RX_DROP_UNUSABLE; if (status->flag & RX_FLAG_DECRYPTED) { - if (!pskb_may_pull(rx->skb, hdrlen + CCMP_HDR_LEN)) + if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_CCMP_HDR_LEN)) return RX_DROP_UNUSABLE; } else { if (skb_linearize(rx->skb)) @@ -516,7 +519,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) queue = rx->security_idx; - if (memcmp(pn, key->u.ccmp.rx_pn[queue], CCMP_PN_LEN) <= 0) { + if (memcmp(pn, key->u.ccmp.rx_pn[queue], IEEE80211_CCMP_PN_LEN) <= 0) { key->u.ccmp.replays++; return RX_DROP_UNUSABLE; } @@ -528,19 +531,20 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) if (ieee80211_aes_ccm_decrypt( key->u.ccmp.tfm, scratch, - skb->data + hdrlen + CCMP_HDR_LEN, data_len, - skb->data + skb->len - CCMP_MIC_LEN, - skb->data + hdrlen + CCMP_HDR_LEN)) + skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN, + data_len, + skb->data + skb->len - IEEE80211_CCMP_MIC_LEN, + skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN)) return RX_DROP_UNUSABLE; } - memcpy(key->u.ccmp.rx_pn[queue], pn, CCMP_PN_LEN); + memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN); /* Remove CCMP header and MIC */ - if (pskb_trim(skb, skb->len - CCMP_MIC_LEN)) + if (pskb_trim(skb, skb->len - IEEE80211_CCMP_MIC_LEN)) return RX_DROP_UNUSABLE; - memmove(skb->data + CCMP_HDR_LEN, skb->data, hdrlen); - skb_pull(skb, CCMP_HDR_LEN); + memmove(skb->data + IEEE80211_CCMP_HDR_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_CCMP_HDR_LEN); return RX_CONTINUE; } diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig new file mode 100644 index 000000000000..37421db88965 --- /dev/null +++ b/net/mpls/Kconfig @@ -0,0 +1,9 @@ +# +# MPLS configuration +# +config NET_MPLS_GSO + tristate "MPLS: GSO support" + help + This is helper module to allow segmentation of non-MPLS GSO packets + that have had MPLS stack entries pushed onto them and thus + become MPLS GSO packets. diff --git a/net/mpls/Makefile b/net/mpls/Makefile new file mode 100644 index 000000000000..0a3c171be537 --- /dev/null +++ b/net/mpls/Makefile @@ -0,0 +1,4 @@ +# +# Makefile for MPLS. +# +obj-y += mpls_gso.o diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c new file mode 100644 index 000000000000..1bec1219ab81 --- /dev/null +++ b/net/mpls/mpls_gso.c @@ -0,0 +1,108 @@ +/* + * MPLS GSO Support + * + * Authors: Simon Horman (horms@verge.net.au) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on: GSO portions of net/ipv4/gre.c + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/err.h> +#include <linux/module.h> +#include <linux/netdev_features.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> + +static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t mpls_features; + __be16 mpls_protocol; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_TCPV6 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_GRE | + SKB_GSO_MPLS))) + goto out; + + /* Setup inner SKB. */ + mpls_protocol = skb->protocol; + skb->protocol = skb->inner_protocol; + + /* Push back the mac header that skb_mac_gso_segment() has pulled. + * It will be re-pulled by the call to skb_mac_gso_segment() below + */ + __skb_push(skb, skb->mac_len); + + /* Segment inner packet. */ + mpls_features = skb->dev->mpls_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, mpls_features); + + + /* Restore outer protocol. */ + skb->protocol = mpls_protocol; + + /* Re-pull the mac header that the call to skb_mac_gso_segment() + * above pulled. It will be re-pushed after returning + * skb_mac_gso_segment(), an indirect caller of this function. + */ + __skb_push(skb, skb->data - skb_mac_header(skb)); + +out: + return segs; +} + +static int mpls_gso_send_check(struct sk_buff *skb) +{ + return 0; +} + +static struct packet_offload mpls_mc_offload = { + .type = cpu_to_be16(ETH_P_MPLS_MC), + .callbacks = { + .gso_send_check = mpls_gso_send_check, + .gso_segment = mpls_gso_segment, + }, +}; + +static struct packet_offload mpls_uc_offload = { + .type = cpu_to_be16(ETH_P_MPLS_UC), + .callbacks = { + .gso_send_check = mpls_gso_send_check, + .gso_segment = mpls_gso_segment, + }, +}; + +static int __init mpls_gso_init(void) +{ + pr_info("MPLS GSO support\n"); + + dev_add_offload(&mpls_uc_offload); + dev_add_offload(&mpls_mc_offload); + + return 0; +} + +static void __exit mpls_gso_exit(void) +{ + dev_remove_offload(&mpls_uc_offload); + dev_remove_offload(&mpls_mc_offload); +} + +module_init(mpls_gso_init); +module_exit(mpls_gso_exit); + +MODULE_DESCRIPTION("MPLS GSO support"); +MODULE_AUTHOR("Simon Horman (horms@verge.net.au)"); +MODULE_LICENSE("GPL"); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 857ca9f35177..2217363ab422 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -304,17 +304,26 @@ static struct pernet_operations netfilter_net_ops = { .exit = netfilter_net_exit, }; -void __init netfilter_init(void) +int __init netfilter_init(void) { - int i, h; + int i, h, ret; + for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) { for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); } - if (register_pernet_subsys(&netfilter_net_ops) < 0) - panic("cannot create netfilter proc entry"); + ret = register_pernet_subsys(&netfilter_net_ops); + if (ret < 0) + goto err; + + ret = netfilter_log_init(); + if (ret < 0) + goto err_pernet; - if (netfilter_log_init() < 0) - panic("cannot initialize nf_log"); + return 0; +err_pernet: + unregister_pernet_subsys(&netfilter_net_ops); +err: + return ret; } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a083bda322b6..c8c52a98590b 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -975,8 +975,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) return cp; } } - rcu_read_unlock(); - rcu_read_lock(); + cond_resched_rcu(); } return NULL; @@ -1015,8 +1014,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) iter->l = &ip_vs_conn_tab[idx]; return cp; } - rcu_read_unlock(); - rcu_read_lock(); + cond_resched_rcu(); } iter->l = NULL; return NULL; @@ -1206,17 +1204,13 @@ void ip_vs_random_dropentry(struct net *net) int idx; struct ip_vs_conn *cp, *cp_c; + rcu_read_lock(); /* * Randomly scan 1/32 of the whole table every second */ for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { unsigned int hash = net_random() & ip_vs_conn_tab_mask; - /* - * Lock is actually needed in this loop. - */ - rcu_read_lock(); - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ @@ -1252,8 +1246,9 @@ void ip_vs_random_dropentry(struct net *net) __ip_vs_conn_put(cp); } } - rcu_read_unlock(); + cond_resched_rcu(); } + rcu_read_unlock(); } @@ -1267,11 +1262,8 @@ static void ip_vs_conn_flush(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); flush_again: + rcu_read_lock(); for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - /* - * Lock is actually needed in this loop. - */ - rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { if (!ip_vs_conn_net_eq(cp, net)) @@ -1286,8 +1278,9 @@ flush_again: __ip_vs_conn_put(cp); } } - rcu_read_unlock(); + cond_resched_rcu(); } + rcu_read_unlock(); /* the counter may be not NULL, because maybe some conn entries are run by slow timer handler or unhashed but still referred */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 9e6c2a075a4c..47e510819f54 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1487,9 +1487,9 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) * Currently only NETDEV_DOWN is handled to release refs to cached dsts */ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_service *svc; @@ -1575,7 +1575,7 @@ static int zero; static int three = 3; static int -proc_do_defense_mode(ctl_table *table, int write, +proc_do_defense_mode(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; @@ -1596,7 +1596,7 @@ proc_do_defense_mode(ctl_table *table, int write, } static int -proc_do_sync_threshold(ctl_table *table, int write, +proc_do_sync_threshold(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; @@ -1616,7 +1616,7 @@ proc_do_sync_threshold(ctl_table *table, int write, } static int -proc_do_sync_mode(ctl_table *table, int write, +proc_do_sync_mode(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; @@ -1634,7 +1634,7 @@ proc_do_sync_mode(ctl_table *table, int write, } static int -proc_do_sync_ports(ctl_table *table, int write, +proc_do_sync_ports(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; @@ -1716,9 +1716,9 @@ static struct ctl_table vs_vars[] = { }, { .procname = "sync_qlen_max", - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_doulongvec_minmax, }, { .procname = "sync_sock_size", diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 5ea26bd87743..44595b8ae37f 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -118,7 +118,7 @@ struct ip_vs_lblc_table { * IPVS LBLC sysctl table */ #ifdef CONFIG_SYSCTL -static ctl_table vs_vars_table[] = { +static struct ctl_table vs_vars_table[] = { { .procname = "lblc_expiration", .data = NULL, diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 50123c2ab484..876937db0bf4 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -299,7 +299,7 @@ struct ip_vs_lblcr_table { * IPVS LBLCR sysctl table */ -static ctl_table vs_vars_table[] = { +static struct ctl_table vs_vars_table[] = { { .procname = "lblcr_expiration", .data = NULL, diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 6b217074237b..b8a0924064ef 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -55,10 +55,14 @@ unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, struct nf_conntrack_expect *exp); EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); -static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char); -static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char); +static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, + char, unsigned int *); +static int try_rfc1123(const char *, size_t, struct nf_conntrack_man *, + char, unsigned int *); +static int try_eprt(const char *, size_t, struct nf_conntrack_man *, + char, unsigned int *); static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *, - char); + char, unsigned int *); static struct ftp_search { const char *pattern; @@ -66,7 +70,7 @@ static struct ftp_search { char skip; char term; enum nf_ct_ftp_type ftptype; - int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char); + int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char, unsigned int *); } search[IP_CT_DIR_MAX][2] = { [IP_CT_DIR_ORIGINAL] = { { @@ -90,10 +94,8 @@ static struct ftp_search { { .pattern = "227 ", .plen = sizeof("227 ") - 1, - .skip = '(', - .term = ')', .ftptype = NF_CT_FTP_PASV, - .getnum = try_rfc959, + .getnum = try_rfc1123, }, { .pattern = "229 ", @@ -132,8 +134,9 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[], i++; else { /* Unexpected character; true if it's the - terminator and we're finished. */ - if (*data == term && i == array_size - 1) + terminator (or we don't care about one) + and we're finished. */ + if ((*data == term || !term) && i == array_size - 1) return len; pr_debug("Char %u (got %u nums) `%u' unexpected\n", @@ -148,7 +151,8 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[], /* Returns 0, or length of numbers: 192,168,1,1,5,6 */ static int try_rfc959(const char *data, size_t dlen, - struct nf_conntrack_man *cmd, char term) + struct nf_conntrack_man *cmd, char term, + unsigned int *offset) { int length; u_int32_t array[6]; @@ -163,6 +167,33 @@ static int try_rfc959(const char *data, size_t dlen, return length; } +/* + * From RFC 1123: + * The format of the 227 reply to a PASV command is not + * well standardized. In particular, an FTP client cannot + * assume that the parentheses shown on page 40 of RFC-959 + * will be present (and in fact, Figure 3 on page 43 omits + * them). Therefore, a User-FTP program that interprets + * the PASV reply must scan the reply for the first digit + * of the host and port numbers. + */ +static int try_rfc1123(const char *data, size_t dlen, + struct nf_conntrack_man *cmd, char term, + unsigned int *offset) +{ + int i; + for (i = 0; i < dlen; i++) + if (isdigit(data[i])) + break; + + if (i == dlen) + return 0; + + *offset += i; + + return try_rfc959(data + i, dlen - i, cmd, 0, offset); +} + /* Grab port: number up to delimiter */ static int get_port(const char *data, int start, size_t dlen, char delim, __be16 *port) @@ -191,7 +222,7 @@ static int get_port(const char *data, int start, size_t dlen, char delim, /* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, - char term) + char term, unsigned int *offset) { char delim; int length; @@ -239,7 +270,8 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, /* Returns 0, or length of numbers: |||6446| */ static int try_epsv_response(const char *data, size_t dlen, - struct nf_conntrack_man *cmd, char term) + struct nf_conntrack_man *cmd, char term, + unsigned int *offset) { char delim; @@ -261,9 +293,10 @@ static int find_pattern(const char *data, size_t dlen, unsigned int *numlen, struct nf_conntrack_man *cmd, int (*getnum)(const char *, size_t, - struct nf_conntrack_man *, char)) + struct nf_conntrack_man *, char, + unsigned int *)) { - size_t i; + size_t i = plen; pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen); if (dlen == 0) @@ -293,16 +326,18 @@ static int find_pattern(const char *data, size_t dlen, pr_debug("Pattern matches!\n"); /* Now we've found the constant string, try to skip to the 'skip' character */ - for (i = plen; data[i] != skip; i++) - if (i == dlen - 1) return -1; + if (skip) { + for (i = plen; data[i] != skip; i++) + if (i == dlen - 1) return -1; - /* Skip over the last character */ - i++; + /* Skip over the last character */ + i++; + } pr_debug("Skipped up to `%c'!\n", skip); *numoff = i; - *numlen = getnum(data + i, dlen - i, cmd, term); + *numlen = getnum(data + i, dlen - i, cmd, term, numoff); if (!*numlen) return -1; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index bd700b4013c1..f641751dba9d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -408,7 +408,7 @@ static int log_invalid_proto_max = 255; static struct ctl_table_header *nf_ct_netfilter_header; -static ctl_table nf_ct_sysctl_table[] = { +static struct ctl_table nf_ct_sysctl_table[] = { { .procname = "nf_conntrack_max", .data = &nf_conntrack_max, @@ -458,7 +458,7 @@ static ctl_table nf_ct_sysctl_table[] = { #define NET_NF_CONNTRACK_MAX 2089 -static ctl_table nf_ct_netfilter_table[] = { +static struct ctl_table nf_ct_netfilter_table[] = { { .procname = "nf_conntrack_max", .data = &nf_conntrack_max, diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 3b18dd1be7d9..85296d4eac0e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -245,7 +245,7 @@ static const struct file_operations nflog_file_ops = { static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; -static int nf_log_proc_dostring(ctl_table *table, int write, +static int nf_log_proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { const struct nf_logger *logger; @@ -369,9 +369,7 @@ static int __net_init nf_log_net_init(struct net *net) out_sysctl: #ifdef CONFIG_PROC_FS - /* For init_net: errors will trigger panic, don't unroll on error. */ - if (!net_eq(net, &init_net)) - remove_proc_entry("nf_log", net->nf.proc_netfilter); + remove_proc_entry("nf_log", net->nf.proc_netfilter); #endif return ret; } diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 5fea563afe30..85e20a919081 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -104,7 +104,7 @@ static void mangle_contents(struct sk_buff *skb, /* move post-replacement */ memmove(data + match_offset + rep_len, data + match_offset + match_len, - skb->tail - (skb->network_header + dataoff + + skb_tail_pointer(skb) - (skb_network_header(skb) + dataoff + match_offset + match_len)); /* insert data from buffer */ diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 5352b2d2d5bf..299a48ae5dc9 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -41,6 +41,14 @@ #define NFQNL_QMAX_DEFAULT 1024 +/* We're using struct nlattr which has 16bit nla_len. Note that nla_len + * includes the header length. Thus, the maximum packet length that we + * support is 65531 bytes. We send truncated packets if the specified length + * is larger than that. Userspace can check for presence of NFQA_CAP_LEN + * attribute to detect truncation. + */ +#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN) + struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ struct rcu_head rcu; @@ -122,7 +130,7 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, inst->queue_num = queue_num; inst->peer_portid = portid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; - inst->copy_range = 0xffff; + inst->copy_range = NFQNL_MAX_COPY_RANGE; inst->copy_mode = NFQNL_COPY_NONE; spin_lock_init(&inst->lock); INIT_LIST_HEAD(&inst->queue_list); @@ -333,10 +341,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, return NULL; data_len = ACCESS_ONCE(queue->copy_range); - if (data_len == 0 || data_len > entskb->len) + if (data_len > entskb->len) data_len = entskb->len; - if (!entskb->head_frag || skb_headlen(entskb) < L1_CACHE_BYTES || skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS) @@ -465,7 +472,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) goto nla_put_failure; - if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) + if (cap_len > data_len && + nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) goto nla_put_failure; if (nfqnl_put_packet_info(skb, entskb)) @@ -509,10 +517,6 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, } spin_lock_bh(&queue->lock); - if (!queue->peer_portid) { - err = -EINVAL; - goto err_out_free_nskb; - } if (queue->queue_total >= queue->queue_maxlen) { if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { failopen = 1; @@ -731,13 +735,8 @@ nfqnl_set_mode(struct nfqnl_instance *queue, case NFQNL_COPY_PACKET: queue->copy_mode = mode; - /* We're using struct nlattr which has 16bit nla_len. Note that - * nla_len includes the header length. Thus, the maximum packet - * length that we support is 65531 bytes. We send truncated - * packets if the specified length is larger than that. - */ - if (range > 0xffff - NLA_HDRLEN) - queue->copy_range = 0xffff - NLA_HDRLEN; + if (range == 0 || range > NFQNL_MAX_COPY_RANGE) + queue->copy_range = NFQNL_MAX_COPY_RANGE; else queue->copy_range = range; break; @@ -800,7 +799,7 @@ static int nfqnl_rcv_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index a60261cb0e80..da35ac06a975 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -26,6 +26,9 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) if (skb->nfct != NULL) return XT_CONTINUE; + /* special case the untracked ct : we want the percpu object */ + if (!ct) + ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); skb->nfct = &ct->ct_general; skb->nfctinfo = IP_CT_NEW; @@ -186,8 +189,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, int ret = -EOPNOTSUPP; if (info->flags & XT_CT_NOTRACK) { - ct = nf_ct_untracked_get(); - atomic_inc(&ct->ct_general.use); + ct = NULL; goto out; } @@ -311,7 +313,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, struct nf_conn *ct = info->ct; struct nf_conn_help *help; - if (!nf_ct_is_untracked(ct)) { + if (ct && !nf_ct_is_untracked(ct)) { help = nfct_help(ct); if (help) module_put(help->helper->me); @@ -319,8 +321,8 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, nf_ct_l3proto_module_put(par->family); xt_ct_destroy_timeout(ct); + nf_ct_put(info->ct); } - nf_ct_put(info->ct); } static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par) diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index bd93e51d30ac..292934d23482 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -200,7 +200,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) static int tee_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct xt_tee_priv *priv; priv = container_of(this, struct xt_tee_priv, notifier); diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index ed0db15ab00e..7720b036d76a 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -18,7 +18,7 @@ static bool xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rateest_match_info *info = par->matchinfo; - struct gnet_stats_rate_est *r; + struct gnet_stats_rate_est64 *r; u_int32_t bps1, bps2, pps1, pps2; bool ret = true; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 63b2bdb59e95..02704245710e 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -107,7 +107,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp = NULL; - struct sock *sk; + struct sock *sk = skb->sk; __be32 uninitialized_var(daddr), uninitialized_var(saddr); __be16 uninitialized_var(dport), uninitialized_var(sport); u8 uninitialized_var(protocol); @@ -155,9 +155,11 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, } #endif - sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, - saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); - if (sk != NULL) { + if (!sk) + sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, + saddr, daddr, sport, dport, + par->in, NFT_LOOKUP_ANY); + if (sk) { bool wildcard; bool transparent = true; @@ -173,7 +175,8 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - xt_socket_put_sk(sk); + if (sk != skb->sk) + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; @@ -260,7 +263,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) { struct ipv6hdr *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; - struct sock *sk; + struct sock *sk = skb->sk; struct in6_addr *daddr = NULL, *saddr = NULL; __be16 uninitialized_var(dport), uninitialized_var(sport); int thoff = 0, uninitialized_var(tproto); @@ -291,9 +294,11 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) return false; } - sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, - saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); - if (sk != NULL) { + if (!sk) + sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + saddr, daddr, sport, dport, + par->in, NFT_LOOKUP_ANY); + if (sk) { bool wildcard; bool transparent = true; @@ -309,7 +314,8 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - xt_socket_put_sk(sk); + if (sk != skb->sk) + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 8a6c6ea466d8..af3531926ee0 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -708,7 +708,7 @@ unlhsh_remove_return: * netlbl_unlhsh_netdev_handler - Network device notification handler * @this: notifier block * @event: the event - * @ptr: the network device (cast to void) + * @ptr: the netdevice notifier info (cast to void) * * Description: * Handle network device events, although at present all we care about is a @@ -717,10 +717,9 @@ unlhsh_remove_return: * */ static int netlbl_unlhsh_netdev_handler(struct notifier_block *this, - unsigned long event, - void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netlbl_unlhsh_iface *iface = NULL; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 57ee84d21470..275d901d7e46 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -750,6 +750,10 @@ static void netlink_skb_destructor(struct sk_buff *skb) skb->head = NULL; } #endif + if (is_vmalloc_addr(skb->head)) { + vfree(skb->head); + skb->head = NULL; + } if (skb->sk != NULL) sock_rfree(skb); } @@ -854,16 +858,23 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } +static bool netlink_compare(struct net *net, struct sock *sk) +{ + return net_eq(sock_net(sk), net); +} + static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { - struct nl_portid_hash *hash = &nl_table[protocol].hash; + struct netlink_table *table = &nl_table[protocol]; + struct nl_portid_hash *hash = &table->hash; struct hlist_head *head; struct sock *sk; read_lock(&nl_table_lock); head = nl_portid_hashfn(hash, portid); sk_for_each(sk, head) { - if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { + if (table->compare(net, sk) && + (nlk_sk(sk)->portid == portid)) { sock_hold(sk); goto found; } @@ -976,7 +987,8 @@ netlink_update_listeners(struct sock *sk) static int netlink_insert(struct sock *sk, struct net *net, u32 portid) { - struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct netlink_table *table = &nl_table[sk->sk_protocol]; + struct nl_portid_hash *hash = &table->hash; struct hlist_head *head; int err = -EADDRINUSE; struct sock *osk; @@ -986,7 +998,8 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) head = nl_portid_hashfn(hash, portid); len = 0; sk_for_each(osk, head) { - if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) + if (table->compare(net, osk) && + (nlk_sk(osk)->portid == portid)) break; len++; } @@ -1183,7 +1196,8 @@ static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct netlink_table *table = &nl_table[sk->sk_protocol]; + struct nl_portid_hash *hash = &table->hash; struct hlist_head *head; struct sock *osk; s32 portid = task_tgid_vnr(current); @@ -1195,7 +1209,7 @@ retry: netlink_table_grab(); head = nl_portid_hashfn(hash, portid); sk_for_each(osk, head) { - if (!net_eq(sock_net(osk), net)) + if (!table->compare(net, osk)) continue; if (nlk_sk(osk)->portid == portid) { /* Bind collision, search negative portid values. */ @@ -1420,6 +1434,35 @@ struct sock *netlink_getsockbyfilp(struct file *filp) return sock; } +static struct sk_buff *netlink_alloc_large_skb(unsigned int size) +{ + struct sk_buff *skb; + void *data; + + if (size <= NLMSG_GOODSIZE) + return alloc_skb(size, GFP_KERNEL); + + skb = alloc_skb_head(GFP_KERNEL); + if (skb == NULL) + return NULL; + + data = vmalloc(size); + if (data == NULL) + goto err; + + skb->head = data; + skb->data = data; + skb_reset_tail_pointer(skb); + skb->end = skb->tail + size; + skb->len = 0; + skb->destructor = netlink_skb_destructor; + + return skb; +err: + kfree_skb(skb); + return NULL; +} + /* * Attach a skb to a netlink socket. * The caller must hold a reference to the destination socket. On error, the @@ -1510,7 +1553,7 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) return skb; delta = skb->end - skb->tail; - if (delta * 2 < skb->truesize) + if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) return skb; if (skb_shared(skb)) { @@ -2096,7 +2139,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; - skb = alloc_skb(len, GFP_KERNEL); + skb = netlink_alloc_large_skb(len); if (skb == NULL) goto out; @@ -2285,6 +2328,8 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (cfg) { nl_table[unit].bind = cfg->bind; nl_table[unit].flags = cfg->flags; + if (cfg->compare) + nl_table[unit].compare = cfg->compare; } nl_table[unit].registered = 1; } else { @@ -2707,6 +2752,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *s; struct nl_seq_iter *iter; + struct net *net; int i, j; ++*pos; @@ -2714,11 +2760,12 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (v == SEQ_START_TOKEN) return netlink_seq_socket_idx(seq, 0); + net = seq_file_net(seq); iter = seq->private; s = v; do { s = sk_next(s); - } while (s && sock_net(s) != seq_file_net(seq)); + } while (s && !nl_table[s->sk_protocol].compare(net, s)); if (s) return s; @@ -2730,7 +2777,8 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); - while (s && sock_net(s) != seq_file_net(seq)) + + while (s && !nl_table[s->sk_protocol].compare(net, s)) s = sk_next(s); if (s) { iter->link = i; @@ -2923,6 +2971,8 @@ static int __init netlink_proto_init(void) hash->shift = 0; hash->mask = 0; hash->rehash_time = jiffies; + + nl_table[i].compare = netlink_compare; } netlink_add_usersock_entry(); diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index ed8522265f4e..eaa88d187cdc 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -73,6 +73,7 @@ struct netlink_table { struct mutex *cb_mutex; struct module *module; void (*bind)(int group); + bool (*compare)(struct net *net, struct sock *sock); int registered; }; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ec0c80fde69f..698814bfa7ad 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -117,7 +117,7 @@ static void nr_kill_by_device(struct net_device *dev) */ static int nr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index 42f630b9a698..ba1c368b3f18 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -34,7 +34,7 @@ static int min_reset[] = {0}, max_reset[] = {1}; static struct ctl_table_header *nr_table_header; -static ctl_table nr_table[] = { +static struct ctl_table nr_table[] = { { .procname = "default_path_quality", .data = &sysctl_netrom_default_path_quality, diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 894b6cbdd929..596d6373399d 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -130,9 +130,13 @@ static int set_eth_addr(struct sk_buff *skb, if (unlikely(err)) return err; + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN); memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN); + ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + return 0; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d12d6b8b5e8b..0f783d9fa00d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -739,10 +739,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (err) goto err_flow_free; - err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, - &flow->key.phy.skb_mark, - &flow->key.phy.in_port, - a[OVS_PACKET_ATTR_KEY]); + err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_free; @@ -1812,10 +1809,11 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; - err = 0; if (a[OVS_VPORT_ATTR_TYPE] && - nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) + nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { err = -EINVAL; + goto exit_unlock; + } reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!reply) { @@ -1823,10 +1821,11 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - if (!err && a[OVS_VPORT_ATTR_OPTIONS]) + if (a[OVS_VPORT_ATTR_OPTIONS]) { err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); - if (err) - goto exit_free; + if (err) + goto exit_free; + } if (a[OVS_VPORT_ATTR_UPCALL_PID]) vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index ef4feec6cd84..c3235675f359 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -78,7 +78,7 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct ovs_net *ovs_net; - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vport *vport = NULL; if (!ovs_is_internal_dev(dev)) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index b15321a2228c..093c191d4fc2 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -590,10 +590,10 @@ out: * - skb->network_header: just past the Ethernet header, or just past the * VLAN header, to the first byte of the Ethernet payload. * - * - skb->transport_header: If key->dl_type is ETH_P_IP or ETH_P_IPV6 + * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 * on output, then just past the IP header, if one is present and * of a correct length, otherwise the same as skb->network_header. - * For other key->dl_type values it is left untouched. + * For other key->eth.type values it is left untouched. */ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, int *key_lenp) @@ -618,6 +618,9 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); __skb_pull(skb, 2 * ETH_ALEN); + /* We are going to push all headers that we pull, so no need to + * update skb->csum here. + */ if (vlan_tx_tag_present(skb)) key->eth.tci = htons(skb->vlan_tci); @@ -1122,10 +1125,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, /** * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. - * @priority: receives the skb priority - * @mark: receives the skb mark - * @in_port: receives the extracted input port. - * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute + * @flow: Receives extracted in_port, priority, tun_key and skb_mark. + * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. * * This parses a series of Netlink attributes that form a flow key, which must @@ -1133,15 +1134,15 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. */ -int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, - const struct nlattr *attr) +int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, + const struct nlattr *attr) { const struct nlattr *nla; int rem; - *in_port = DP_MAX_PORTS; - *priority = 0; - *mark = 0; + flow->key.phy.in_port = DP_MAX_PORTS; + flow->key.phy.priority = 0; + flow->key.phy.skb_mark = 0; nla_for_each_nested(nla, attr, rem) { int type = nla_type(nla); @@ -1152,17 +1153,17 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, switch (type) { case OVS_KEY_ATTR_PRIORITY: - *priority = nla_get_u32(nla); + flow->key.phy.priority = nla_get_u32(nla); break; case OVS_KEY_ATTR_IN_PORT: if (nla_get_u32(nla) >= DP_MAX_PORTS) return -EINVAL; - *in_port = nla_get_u32(nla); + flow->key.phy.in_port = nla_get_u32(nla); break; case OVS_KEY_ATTR_SKB_MARK: - *mark = nla_get_u32(nla); + flow->key.phy.skb_mark = nla_get_u32(nla); break; } } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 0875fde65b9c..2a83e2141f08 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -141,8 +141,8 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, - const struct nlattr *); +int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, + const struct nlattr *attr); #define MAX_ACTIONS_BUFSIZE (16 * 1024) #define TBL_MIN_BUCKETS 1024 diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 84e0a0379186..e284c7e1fec4 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -221,6 +221,7 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) skb->dev = netdev; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, netdev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); netif_rx(skb); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 4f01c6d2ffa4..40de815b4213 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -49,6 +49,8 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) return; skb_push(skb, ETH_HLEN); + ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + ovs_vport_receive(vport, skb); return; @@ -170,7 +172,7 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", netdev_vport->dev->name, packet_length(skb), mtu); - goto error; + goto drop; } skb->dev = netdev_vport->dev; @@ -179,9 +181,8 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) return len; -error: +drop: kfree_skb(skb); - ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); return 0; } diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index a3cb3a32cd77..dd298b5c5cdb 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -39,6 +39,5 @@ netdev_vport_priv(const struct vport *vport) } const char *ovs_netdev_get_name(const struct vport *); -const char *ovs_netdev_get_config(const struct vport *); #endif /* vport_netdev.h */ diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 720623190eaa..176d449351eb 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -351,7 +351,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) { int sent = vport->ops->send(vport, skb); - if (likely(sent)) { + if (likely(sent > 0)) { struct pcpu_tstats *stats; stats = this_cpu_ptr(vport->percpu_stats); @@ -360,7 +360,12 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) stats->tx_packets++; stats->tx_bytes += sent; u64_stats_update_end(&stats->syncp); - } + } else if (sent < 0) { + ovs_vport_record_error(vport, VPORT_E_TX_ERROR); + kfree_skb(skb); + } else + ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); + return sent; } @@ -371,7 +376,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) * @err_type: one of enum vport_err_type types to indicate the error type * * If using the vport generic stats layer indicate that an error of the given - * type has occured. + * type has occurred. */ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) { diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 68a377bc0841..293278c4c2df 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -123,9 +123,8 @@ struct vport_parms { * existing vport to a &struct sk_buff. May be %NULL for a vport that does not * have any configuration. * @get_name: Get the device's name. - * @get_config: Get the device's configuration. - * May be null if the device does not have an ifindex. - * @send: Send a packet on the device. Returns the length of the packet sent. + * @send: Send a packet on the device. Returns the length of the packet sent, + * zero for dropped packets or negative for error. */ struct vport_ops { enum ovs_vport_type type; @@ -139,7 +138,6 @@ struct vport_ops { /* Called with rcu_read_lock or ovs_mutex. */ const char *(*get_name)(const struct vport *); - void (*get_config)(const struct vport *, void *); int (*send)(struct vport *, struct sk_buff *); }; @@ -194,4 +192,11 @@ void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); extern const struct vport_ops ovs_netdev_vport_ops; extern const struct vport_ops ovs_internal_vport_ops; +static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); +} + #endif /* vport.h */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 20a1bd0e6549..4b66c752eae5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3330,10 +3330,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, } -static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data) +static int packet_notifier(struct notifier_block *this, + unsigned long msg, void *ptr) { struct sock *sk; - struct net_device *dev = data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); rcu_read_lock(); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 45a7df6575de..56a6146ac94b 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -292,9 +292,9 @@ static void phonet_route_autodel(struct net_device *dev) /* notify Phonet of device events */ static int phonet_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (what) { case NETDEV_REGISTER: diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index d6bbbbd0af18..c02a8c4bc11f 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -61,13 +61,13 @@ void phonet_get_local_port_range(int *min, int *max) } while (read_seqretry(&local_port_range_lock, seq)); } -static int proc_local_port_range(ctl_table *table, int write, +static int proc_local_port_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; int range[2] = {local_port_range[0], local_port_range[1]}; - ctl_table tmp = { + struct ctl_table tmp = { .data = &range, .maxlen = sizeof(range), .mode = table->mode, diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c index 7e643bafb4af..e4e41b3afce7 100644 --- a/net/rds/ib_sysctl.c +++ b/net/rds/ib_sysctl.c @@ -61,7 +61,7 @@ static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64; */ unsigned int rds_ib_sysctl_flow_control = 0; -static ctl_table rds_ib_sysctl_table[] = { +static struct ctl_table rds_ib_sysctl_table[] = { { .procname = "max_send_wr", .data = &rds_ib_sysctl_max_send_wr, diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c index 5d5ebd576f3f..89c91515ed0c 100644 --- a/net/rds/iw_sysctl.c +++ b/net/rds/iw_sysctl.c @@ -55,7 +55,7 @@ static unsigned long rds_iw_sysctl_max_unsig_bytes_max = ~0UL; unsigned int rds_iw_sysctl_flow_control = 1; -static ctl_table rds_iw_sysctl_table[] = { +static struct ctl_table rds_iw_sysctl_table[] = { { .procname = "max_send_wr", .data = &rds_iw_sysctl_max_send_wr, diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index 907214b4c4d0..b5cb2aa08f33 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -49,7 +49,7 @@ unsigned int rds_sysctl_max_unacked_bytes = (16 << 20); unsigned int rds_sysctl_ping_enable = 1; -static ctl_table rds_sysctl_rds_table[] = { +static struct ctl_table rds_sysctl_rds_table[] = { { .procname = "reconnect_min_delay_ms", .data = &rds_sysctl_reconnect_min_jiffies, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 9c8347451597..e98fcfbe6007 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -202,10 +202,10 @@ static void rose_kill_by_device(struct net_device *dev) /* * Handle device status changes. */ -static int rose_device_event(struct notifier_block *this, unsigned long event, - void *ptr) +static int rose_device_event(struct notifier_block *this, + unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c index 94ca9c2ccd69..89a9278795a9 100644 --- a/net/rose/sysctl_net_rose.c +++ b/net/rose/sysctl_net_rose.c @@ -24,7 +24,7 @@ static int min_window[] = {1}, max_window[] = {7}; static struct ctl_table_header *rose_table_header; -static ctl_table rose_table[] = { +static struct ctl_table rose_table[] = { { .procname = "restart_request_timeout", .data = &sysctl_rose_restart_request_timeout, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 5d676edc22a6..977c10e0631b 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -243,7 +243,7 @@ nla_put_failure: static int mirred_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct tcf_mirred *m; if (event == NETDEV_UNREGISTER) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 1bc210ffcba2..71a568862557 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -130,7 +130,7 @@ struct cbq_class { psched_time_t penalized; struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct tc_cbq_xstats xstats; struct tcf_proto *filter_list; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 759b308d1a8d..8302717ea303 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -25,7 +25,7 @@ struct drr_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct list_head alist; struct Qdisc *qdisc; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 20224086cc28..4626cef4b76e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -901,37 +901,33 @@ void dev_shutdown(struct net_device *dev) void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf) { - u64 factor; - u64 mult; - int shift; - memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; - r->rate_bps = (u64)conf->rate << 3; + r->rate_bytes_ps = conf->rate; r->mult = 1; /* - * Calibrate mult, shift so that token counting is accurate - * for smallest packet size (64 bytes). Token (time in ns) is - * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will - * work as long as the smallest packet transfer time can be - * accurately represented in nanosec. + * The deal here is to replace a divide by a reciprocal one + * in fast path (a reciprocal divide is a multiply and a shift) + * + * Normal formula would be : + * time_in_ns = (NSEC_PER_SEC * len) / rate_bps + * + * We compute mult/shift to use instead : + * time_in_ns = (len * mult) >> shift; + * + * We try to get the highest possible mult value for accuracy, + * but have to make sure no overflows will ever happen. */ - if (r->rate_bps > 0) { - /* - * Higher shift gives better accuracy. Find the largest - * shift such that mult fits in 32 bits. - */ - for (shift = 0; shift < 16; shift++) { - r->shift = shift; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - mult = div64_u64(factor, r->rate_bps); - if (mult > UINT_MAX) + if (r->rate_bytes_ps > 0) { + u64 factor = NSEC_PER_SEC; + + for (;;) { + r->mult = div64_u64(factor, r->rate_bytes_ps); + if (r->mult & (1U << 31) || factor & (1ULL << 63)) break; + factor <<= 1; + r->shift++; } - - r->shift = shift - 1; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - r->mult = div64_u64(factor, r->rate_bps); } } EXPORT_SYMBOL(psched_ratecfg_precompute); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 9facea03faeb..c4075610502c 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -114,7 +114,7 @@ struct hfsc_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; unsigned int level; /* class level in hierarchy */ struct tcf_proto *filter_list; /* filter list */ unsigned int filter_cnt; /* filter count */ diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index adaedd79389c..7954e73d118a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -65,6 +65,10 @@ static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis f module_param (htb_hysteresis, int, 0640); MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate"); +static int htb_rate_est = 0; /* htb classes have a default rate estimator */ +module_param(htb_rate_est, int, 0640); +MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes"); + /* used internaly to keep status of single class */ enum htb_cmode { HTB_CANT_SEND, /* class can't send and can't borrow */ @@ -72,23 +76,39 @@ enum htb_cmode { HTB_CAN_SEND /* class can send */ }; -/* interior & leaf nodes; props specific to leaves are marked L: */ +/* interior & leaf nodes; props specific to leaves are marked L: + * To reduce false sharing, place mostly read fields at beginning, + * and mostly written ones at the end. + */ struct htb_class { struct Qdisc_class_common common; - /* general class parameters */ - struct gnet_stats_basic_packed bstats; - struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; - struct tc_htb_xstats xstats; /* our special stats */ - int refcnt; /* usage count of this class */ + struct psched_ratecfg rate; + struct psched_ratecfg ceil; + s64 buffer, cbuffer;/* token bucket depth/rate */ + s64 mbuffer; /* max wait time */ + int prio; /* these two are used only by leaves... */ + int quantum; /* but stored for parent-to-leaf return */ + + struct tcf_proto *filter_list; /* class attached filters */ + int filter_cnt; + int refcnt; /* usage count of this class */ - /* topology */ - int level; /* our level (see above) */ - unsigned int children; - struct htb_class *parent; /* parent class */ + int level; /* our level (see above) */ + unsigned int children; + struct htb_class *parent; /* parent class */ + + struct gnet_stats_rate_est64 rate_est; + + /* + * Written often fields + */ + struct gnet_stats_basic_packed bstats; + struct gnet_stats_queue qstats; + struct tc_htb_xstats xstats; /* our special stats */ - int prio; /* these two are used only by leaves... */ - int quantum; /* but stored for parent-to-leaf return */ + /* token bucket parameters */ + s64 tokens, ctokens;/* current number of tokens */ + s64 t_c; /* checkpoint time */ union { struct htb_class_leaf { @@ -107,24 +127,12 @@ struct htb_class { u32 last_ptr_id[TC_HTB_NUMPRIO]; } inner; } un; - struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ - struct rb_node pq_node; /* node for event queue */ - s64 pq_key; - - int prio_activity; /* for which prios are we active */ - enum htb_cmode cmode; /* current mode of the class */ - - /* class attached filters */ - struct tcf_proto *filter_list; - int filter_cnt; + s64 pq_key; - /* token bucket parameters */ - struct psched_ratecfg rate; - struct psched_ratecfg ceil; - s64 buffer, cbuffer; /* token bucket depth/rate */ - s64 mbuffer; /* max wait time */ - s64 tokens, ctokens; /* current number of tokens */ - s64 t_c; /* checkpoint time */ + int prio_activity; /* for which prios are we active */ + enum htb_cmode cmode; /* current mode of the class */ + struct rb_node pq_node; /* node for event queue */ + struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ }; struct htb_sched { @@ -1366,12 +1374,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; - err = gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE] ? : &est.nla); - if (err) { - kfree(cl); - goto failure; + if (htb_rate_est || tca[TCA_RATE]) { + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE] ? : &est.nla); + if (err) { + kfree(cl); + goto failure; + } } cl->refcnt = 1; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d51852bba01c..7c195d972bf0 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -138,7 +138,7 @@ struct qfq_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct Qdisc *qdisc; struct list_head alist; /* Link for active-classes list. */ struct qfq_aggregate *agg; /* Parent aggregate. */ diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index e478d316602b..1aaf1b6e51a2 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -116,14 +116,57 @@ struct tbf_sched_data { struct qdisc_watchdog watchdog; /* Watchdog timer */ }; + +/* GSO packet is too big, segment it so that tbf can transmit + * each segment in time + */ +static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) +{ + struct tbf_sched_data *q = qdisc_priv(sch); + struct sk_buff *segs, *nskb; + netdev_features_t features = netif_skb_features(skb); + int ret, nb; + + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + + if (IS_ERR_OR_NULL(segs)) + return qdisc_reshape_fail(skb, sch); + + nb = 0; + while (segs) { + nskb = segs->next; + segs->next = NULL; + if (likely(segs->len <= q->max_size)) { + qdisc_skb_cb(segs)->pkt_len = segs->len; + ret = qdisc_enqueue(segs, q->qdisc); + } else { + ret = qdisc_reshape_fail(skb, sch); + } + if (ret != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; + } else { + nb++; + } + segs = nskb; + } + sch->q.qlen += nb; + if (nb > 1) + qdisc_tree_decrease_qlen(sch, 1 - nb); + consume_skb(skb); + return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; +} + static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); int ret; - if (qdisc_pkt_len(skb) > q->max_size) + if (qdisc_pkt_len(skb) > q->max_size) { + if (skb_is_gso(skb)) + return tbf_segment(skb, sch); return qdisc_reshape_fail(skb, sch); - + } ret = qdisc_enqueue(skb, q->qdisc); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 91cfd8f94a19..bf6e6bd553c0 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -86,10 +86,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Discarding const is appropriate here. */ asoc->ep = (struct sctp_endpoint *)ep; - sctp_endpoint_hold(asoc->ep); - - /* Hold the sock. */ asoc->base.sk = (struct sock *)sk; + + sctp_endpoint_hold(asoc->ep); sock_hold(asoc->base.sk); /* Initialize the common base substructure. */ @@ -343,8 +342,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a return asoc; fail_init: - sctp_endpoint_put(asoc->ep); sock_put(asoc->base.sk); + sctp_endpoint_put(asoc->ep); return NULL; } @@ -356,7 +355,7 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, { struct sctp_association *asoc; - asoc = t_new(struct sctp_association, gfp); + asoc = kzalloc(sizeof(*asoc), gfp); if (!asoc) goto fail; diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 41145fe31813..64977ea0f9c5 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -162,7 +162,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, struct sctp_sockaddr_entry *addr; /* Add the address to the bind address list. */ - addr = t_new(struct sctp_sockaddr_entry, gfp); + addr = kzalloc(sizeof(*addr), gfp); if (!addr) return -ENOMEM; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 69ce21e3716f..7135fc0c087a 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -66,7 +66,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) } /* Allocate and initialize datamsg. */ -SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) +static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) { struct sctp_datamsg *msg; msg = kmalloc(sizeof(struct sctp_datamsg), gfp); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 5fbd7bc6bb11..a8b26741c0af 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -192,9 +192,10 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp) struct sctp_endpoint *ep; /* Build a local endpoint. */ - ep = t_new(struct sctp_endpoint, gfp); + ep = kzalloc(sizeof(*ep), gfp); if (!ep) goto fail; + if (!sctp_endpoint_init(ep, sk, gfp)) goto fail_init; diff --git a/net/sctp/input.c b/net/sctp/input.c index 4b2c83146aa7..4cfc74699a3f 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -589,7 +589,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) struct sctp_association *asoc = NULL; struct sctp_transport *transport; struct inet_sock *inet; - sk_buff_data_t saveip, savesctp; + __u16 saveip, savesctp; int err; struct net *net = dev_net(skb->dev); @@ -903,11 +903,11 @@ hit: } /* Look up an association. BH-safe. */ -SCTP_STATIC +static struct sctp_association *sctp_lookup_association(struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, - struct sctp_transport **transportp) + struct sctp_transport **transportp) { struct sctp_association *asoc; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 391a245d5203..adeaa0e64f52 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -145,15 +145,15 @@ static struct notifier_block sctp_inet6addr_notifier = { }; /* ICMP error handler. */ -SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) +static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) { struct inet6_dev *idev; struct sock *sk; struct sctp_association *asoc; struct sctp_transport *transport; struct ipv6_pinfo *np; - sk_buff_data_t saveip, savesctp; + __u16 saveip, savesctp; int err; struct net *net = dev_net(skb->dev); @@ -402,7 +402,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, read_lock_bh(&in6_dev->lock); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { /* Add the address to the local list. */ - addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; addr->a.v6.sin6_port = 0; diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 4e45ee35d0db..0c83162a6bf8 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -134,9 +134,15 @@ static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_commo struct sctp_af *af; if (epb->type == SCTP_EP_TYPE_ASSOCIATION) { - asoc = sctp_assoc(epb); - peer = asoc->peer.primary_path; - primary = &peer->saddr; + asoc = sctp_assoc(epb); + + peer = asoc->peer.primary_path; + if (unlikely(peer == NULL)) { + WARN(1, "Association %p with NULL primary path!", asoc); + return; + } + + primary = &peer->saddr; } rcu_read_lock(); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index eaee00c61139..57b568c38ef6 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -153,7 +153,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { /* Add the address to the local list. */ - addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v4.sin_family = AF_INET; addr->a.v4.sin_port = 0; @@ -1312,7 +1312,7 @@ static struct pernet_operations sctp_net_ops = { }; /* Initialize the universe into something sensible. */ -SCTP_STATIC __init int sctp_init(void) +static __init int sctp_init(void) { int i; int status = -EINVAL; @@ -1499,7 +1499,7 @@ err_chunk_cachep: } /* Exit handler for the SCTP protocol. */ -SCTP_STATIC __exit void sctp_exit(void) +static __exit void sctp_exit(void) { /* BUG. This should probably do something useful like clean * up all the remaining associations and all that memory. diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index cf579e71cff0..fc8548743ed5 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -68,9 +68,8 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -SCTP_STATIC -struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, - __u8 type, __u8 flags, int paylen); +static struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, + __u8 type, __u8 flags, int paylen); static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *init_chunk, @@ -1353,9 +1352,8 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk) /* Create a new chunk, setting the type and flags headers from the * arguments, reserving enough space for a 'paylen' byte payload. */ -SCTP_STATIC -struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, - __u8 type, __u8 flags, int paylen) +static struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, + __u8 type, __u8 flags, int paylen) { struct sctp_chunk *retval; sctp_chunkhdr_t *chunk_hdr; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8aab894aeabe..ff91f47b0239 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -864,6 +864,7 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds, (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK)) return; + BUG_ON(asoc->peer.primary_path == NULL); sctp_unhash_established(asoc); sctp_association_free(asoc); } @@ -1274,8 +1275,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_outq_uncork(&asoc->outqueue); local_cork = 0; } - asoc = cmd->obj.asoc; + /* Register with the endpoint. */ + asoc = cmd->obj.asoc; + BUG_ON(asoc->peer.primary_path == NULL); sctp_endpoint_add_asoc(ep, asoc); sctp_hash_established(asoc); break; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index de1a0138317f..b3d186856513 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4632,16 +4632,16 @@ sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net, if (!repl) goto nomem; + /* Choose transport for INIT. */ + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT, + SCTP_CHUNK(repl)); + /* Cast away the const modifier, as we want to just * rerun it through as a sideffect. */ my_asoc = (struct sctp_association *)asoc; sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(my_asoc)); - /* Choose transport for INIT. */ - sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT, - SCTP_CHUNK(repl)); - /* After sending the INIT, "A" starts the T1-init timer and * enters the COOKIE-WAIT state. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6abb1caf9836..32db19ba4a21 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -84,11 +84,6 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -/* WARNING: Please do not remove the SCTP_STATIC attribute to - * any of the functions below as they are used to export functions - * used by a project regression testsuite. - */ - /* Forward declarations for internal helper functions. */ static int sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); @@ -279,7 +274,7 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk, * sockaddr_in6 [RFC 2553]), * addr_len - the size of the address structure. */ -SCTP_STATIC int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) +static int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) { int retval = 0; @@ -333,7 +328,7 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, } /* Bind a local address either to an endpoint or to an association. */ -SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) +static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) { struct net *net = sock_net(sk); struct sctp_sock *sp = sctp_sk(sk); @@ -964,9 +959,9 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) * * Returns 0 if ok, <0 errno code on error. */ -SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk, - struct sockaddr __user *addrs, - int addrs_size, int op) +static int sctp_setsockopt_bindx(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size, int op) { struct sockaddr *kaddrs; int err; @@ -1312,7 +1307,7 @@ out_free: * * Returns >=0 if ok, <0 errno code on error. */ -SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk, +static int __sctp_setsockopt_connectx(struct sock* sk, struct sockaddr __user *addrs, int addrs_size, sctp_assoc_t *assoc_id) @@ -1350,9 +1345,9 @@ SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk, * This is an older interface. It's kept for backward compatibility * to the option that doesn't provide association id. */ -SCTP_STATIC int sctp_setsockopt_connectx_old(struct sock* sk, - struct sockaddr __user *addrs, - int addrs_size) +static int sctp_setsockopt_connectx_old(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size) { return __sctp_setsockopt_connectx(sk, addrs, addrs_size, NULL); } @@ -1363,9 +1358,9 @@ SCTP_STATIC int sctp_setsockopt_connectx_old(struct sock* sk, * indication to the call. Error is always negative and association id is * always positive. */ -SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, - struct sockaddr __user *addrs, - int addrs_size) +static int sctp_setsockopt_connectx(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size) { sctp_assoc_t assoc_id = 0; int err = 0; @@ -1386,9 +1381,9 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, * addrs_num structure member. That way we can re-use the existing * code. */ -SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, - char __user *optval, - int __user *optlen) +static int sctp_getsockopt_connectx3(struct sock* sk, int len, + char __user *optval, + int __user *optlen) { struct sctp_getaddrs_old param; sctp_assoc_t assoc_id = 0; @@ -1464,7 +1459,7 @@ SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, * shutdown phase does not finish during this period, close() will * return but the graceful shutdown phase continues in the system. */ -SCTP_STATIC void sctp_close(struct sock *sk, long timeout) +static void sctp_close(struct sock *sk, long timeout) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; @@ -1573,10 +1568,10 @@ static int sctp_error(struct sock *sk, int flags, int err) */ /* BUG: We do not implement the equivalent of sk_stream_wait_memory(). */ -SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); +static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); -SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t msg_len) +static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t msg_len) { struct net *net = sock_net(sk); struct sctp_sock *sp; @@ -2034,9 +2029,9 @@ static int sctp_skb_pull(struct sk_buff *skb, int len) */ static struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); -SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) { struct sctp_ulpevent *event = NULL; struct sctp_sock *sp = sctp_sk(sk); @@ -3565,8 +3560,8 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk, * optval - the buffer to store the value of the option. * optlen - the size of the buffer. */ -SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) +static int sctp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen) { int retval = 0; @@ -3725,8 +3720,8 @@ out_nounlock: * * len: the size of the address. */ -SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr, - int addr_len) +static int sctp_connect(struct sock *sk, struct sockaddr *addr, + int addr_len) { int err = 0; struct sctp_af *af; @@ -3752,7 +3747,7 @@ SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr, } /* FIXME: Write comments. */ -SCTP_STATIC int sctp_disconnect(struct sock *sk, int flags) +static int sctp_disconnect(struct sock *sk, int flags) { return -EOPNOTSUPP; /* STUB */ } @@ -3764,7 +3759,7 @@ SCTP_STATIC int sctp_disconnect(struct sock *sk, int flags) * descriptor will be returned from accept() to represent the newly * formed association. */ -SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err) +static struct sock *sctp_accept(struct sock *sk, int flags, int *err) { struct sctp_sock *sp; struct sctp_endpoint *ep; @@ -3817,7 +3812,7 @@ out: } /* The SCTP ioctl handler. */ -SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) +static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) { int rc = -ENOTCONN; @@ -3859,10 +3854,9 @@ out: * initialized the SCTP-specific portion of the sock. * The sock structure should already be zero-filled memory. */ -SCTP_STATIC int sctp_init_sock(struct sock *sk) +static int sctp_init_sock(struct sock *sk) { struct net *net = sock_net(sk); - struct sctp_endpoint *ep; struct sctp_sock *sp; SCTP_DEBUG_PRINTK("sctp_init_sock(sk: %p)\n", sk); @@ -3971,11 +3965,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) * change the data structure relationships, this may still * be useful for storing pre-connect address information. */ - ep = sctp_endpoint_new(sk, GFP_KERNEL); - if (!ep) + sp->ep = sctp_endpoint_new(sk, GFP_KERNEL); + if (!sp->ep) return -ENOMEM; - sp->ep = ep; sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); @@ -3995,7 +3988,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) } /* Cleanup any SCTP per socket resources. */ -SCTP_STATIC void sctp_destroy_sock(struct sock *sk) +static void sctp_destroy_sock(struct sock *sk) { struct sctp_sock *sp; @@ -4036,7 +4029,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) * Disables further send and receive operations * and initiates the SCTP shutdown sequence. */ -SCTP_STATIC void sctp_shutdown(struct sock *sk, int how) +static void sctp_shutdown(struct sock *sk, int how) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; @@ -5708,8 +5701,8 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, return 0; } -SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +static int sctp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { int retval = 0; int len; @@ -6054,7 +6047,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum) /* * Move a socket to LISTENING state. */ -SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) +static int sctp_listen_start(struct sock *sk, int backlog) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; @@ -6341,8 +6334,7 @@ static int sctp_autobind(struct sock *sk) * msg_control * points here */ -SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg, - sctp_cmsgs_t *cmsgs) +static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) { struct cmsghdr *cmsg; struct msghdr *my_msg = (struct msghdr *)msg; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index bf3c6e8fc401..9a5c4c9eddaf 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -62,12 +62,12 @@ extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; -static int proc_sctp_do_hmac_alg(ctl_table *ctl, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -static ctl_table sctp_table[] = { +static struct ctl_table sctp_table[] = { { .procname = "sctp_mem", .data = &sysctl_sctp_mem, @@ -93,7 +93,7 @@ static ctl_table sctp_table[] = { { /* sentinel */ } }; -static ctl_table sctp_net_table[] = { +static struct ctl_table sctp_net_table[] = { { .procname = "rto_initial", .data = &init_net.sctp.rto_initial, @@ -300,14 +300,14 @@ static ctl_table sctp_net_table[] = { { /* sentinel */ } }; -static int proc_sctp_do_hmac_alg(ctl_table *ctl, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; char tmp[8]; - ctl_table tbl; + struct ctl_table tbl; int ret; int changed = 0; char *none = "none"; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 098f1d5f769e..5d3c71bbd197 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -116,7 +116,7 @@ struct sctp_transport *sctp_transport_new(struct net *net, { struct sctp_transport *transport; - transport = t_new(struct sctp_transport, gfp); + transport = kzalloc(sizeof(*transport), gfp); if (!transport) goto fail; diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 396c45174e5b..b46019568a86 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -161,8 +161,8 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn, /* Initialize a Gap Ack Block iterator from memory being provided. */ -SCTP_STATIC void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map, - struct sctp_tsnmap_iter *iter) +static void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map, + struct sctp_tsnmap_iter *iter) { /* Only start looking one past the Cumulative TSN Ack Point. */ iter->start = map->cumulative_tsn_ack_point + 1; @@ -171,9 +171,9 @@ SCTP_STATIC void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map, /* Get the next Gap Ack Blocks. Returns 0 if there was not another block * to get. */ -SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, - struct sctp_tsnmap_iter *iter, - __u16 *start, __u16 *end) +static int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, + struct sctp_tsnmap_iter *iter, + __u16 *start, __u16 *end) { int ended = 0; __u16 start_ = 0, end_ = 0, offset; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 10c018a5b9fe..44a45dbee4df 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -57,9 +57,9 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event); /* Initialize an ULP event from an given skb. */ -SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, - int msg_flags, - unsigned int len) +static void sctp_ulpevent_init(struct sctp_ulpevent *event, + int msg_flags, + unsigned int len) { memset(event, 0, sizeof(struct sctp_ulpevent)); event->msg_flags = msg_flags; @@ -67,8 +67,8 @@ SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, } /* Create a new sctp_ulpevent. */ -SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, - gfp_t gfp) +static struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, + gfp_t gfp) { struct sctp_ulpevent *event; struct sk_buff *skb; diff --git a/net/socket.c b/net/socket.c index 4ca1526db756..3eec3f76b49c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -104,6 +104,11 @@ #include <linux/route.h> #include <linux/sockios.h> #include <linux/atalk.h> +#include <net/ll_poll.h> + +#ifdef CONFIG_NET_LL_RX_POLL +unsigned int sysctl_net_ll_poll __read_mostly; +#endif static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, @@ -2635,7 +2640,9 @@ static int __init sock_init(void) */ #ifdef CONFIG_NETFILTER - netfilter_init(); + err = netfilter_init(); + if (err) + goto out; #endif #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index af7d339add9d..c99c58e2ee66 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(nlm_debug); #ifdef RPC_DEBUG static struct ctl_table_header *sunrpc_table_header; -static ctl_table sunrpc_table[]; +static struct ctl_table sunrpc_table[]; void rpc_register_sysctl(void) @@ -58,7 +58,7 @@ rpc_unregister_sysctl(void) } } -static int proc_do_xprt(ctl_table *table, int write, +static int proc_do_xprt(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[256]; @@ -73,7 +73,7 @@ static int proc_do_xprt(ctl_table *table, int write, } static int -proc_dodebug(ctl_table *table, int write, +proc_dodebug(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[20], c, *s; @@ -135,7 +135,7 @@ done: } -static ctl_table debug_table[] = { +static struct ctl_table debug_table[] = { { .procname = "rpc_debug", .data = &rpc_debug, @@ -173,7 +173,7 @@ static ctl_table debug_table[] = { { } }; -static ctl_table sunrpc_table[] = { +static struct ctl_table sunrpc_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 8343737e85f4..c1b6270262c2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -84,7 +84,7 @@ struct workqueue_struct *svc_rdma_wq; * resets the associated statistic to zero. Any read returns it's * current value. */ -static int read_reset_stat(ctl_table *table, int write, +static int read_reset_stat(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -119,7 +119,7 @@ static int read_reset_stat(ctl_table *table, int write, } static struct ctl_table_header *svcrdma_table_header; -static ctl_table svcrdma_parm_table[] = { +static struct ctl_table svcrdma_parm_table[] = { { .procname = "max_requests", .data = &svcrdma_max_requests, @@ -214,7 +214,7 @@ static ctl_table svcrdma_parm_table[] = { { }, }; -static ctl_table svcrdma_table[] = { +static struct ctl_table svcrdma_table[] = { { .procname = "svc_rdma", .mode = 0555, @@ -223,7 +223,7 @@ static ctl_table svcrdma_table[] = { { }, }; -static ctl_table svcrdma_root_table[] = { +static struct ctl_table svcrdma_root_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 794312f22b9b..285dc0884115 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -86,7 +86,7 @@ static unsigned int max_memreg = RPCRDMA_LAST - 1; static struct ctl_table_header *sunrpc_table_header; -static ctl_table xr_tunables_table[] = { +static struct ctl_table xr_tunables_table[] = { { .procname = "rdma_slot_table_entries", .data = &xprt_rdma_slot_table_entries, @@ -138,7 +138,7 @@ static ctl_table xr_tunables_table[] = { { }, }; -static ctl_table sunrpc_table[] = { +static struct ctl_table sunrpc_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ffd50348a509..412de7cfcc80 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -87,7 +87,7 @@ static struct ctl_table_header *sunrpc_table_header; * FIXME: changing the UDP slot table size should also resize the UDP * socket buffers for existing UDP transports */ -static ctl_table xs_tunables_table[] = { +static struct ctl_table xs_tunables_table[] = { { .procname = "udp_slot_table_entries", .data = &xprt_udp_slot_table_entries, @@ -143,7 +143,7 @@ static ctl_table xs_tunables_table[] = { { }, }; -static ctl_table sunrpc_table[] = { +static struct ctl_table sunrpc_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 4df8e02d9008..b282f7130d2b 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -8,6 +8,7 @@ tipc-y += addr.o bcast.o bearer.o config.o \ core.o handler.o link.o discover.o msg.o \ name_distr.o subscr.o name_table.o net.o \ netlink.o node.o node_subscr.o port.o ref.o \ - socket.o log.o eth_media.o + socket.o log.o eth_media.o server.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o +tipc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index e5f3da507823..716de1ac6cb5 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -578,8 +578,7 @@ u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) * Returns 0 (packet sent successfully) under all circumstances, * since the broadcast link's pseudo-bearer never blocks */ -static int tipc_bcbearer_send(struct sk_buff *buf, - struct tipc_bearer *unused1, +static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, struct tipc_media_addr *unused2) { int bp_index; diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index a93306557e00..6ee587b469fd 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -75,7 +75,8 @@ void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); /** * tipc_nmap_equal - test for equality of node maps */ -static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b) +static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, + struct tipc_node_map *nm_b) { return !memcmp(nm_a, nm_b, sizeof(*nm_a)); } diff --git a/net/tipc/config.c b/net/tipc/config.c index f67866c765dd..c301a9a592d8 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -2,7 +2,7 @@ * net/tipc/config.c: TIPC configuration management code * * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2007, 2010-2012, Wind River Systems + * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,12 +38,12 @@ #include "port.h" #include "name_table.h" #include "config.h" +#include "server.h" #define REPLY_TRUNCATED "<truncated>\n" -static u32 config_port_ref; - -static DEFINE_SPINLOCK(config_lock); +static DEFINE_MUTEX(config_mutex); +static struct tipc_server cfgsrv; static const void *req_tlv_area; /* request message TLV area */ static int req_tlv_space; /* request message TLV area size */ @@ -181,18 +181,7 @@ static struct sk_buff *cfg_set_own_addr(void) if (tipc_own_addr) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); - - /* - * Must temporarily release configuration spinlock while switching into - * networking mode as it calls tipc_eth_media_start(), which may sleep. - * Releasing the lock is harmless as other locally-issued configuration - * commands won't occur until this one completes, and remotely-issued - * configuration commands can't be received until a local configuration - * command to enable the first bearer is received and processed. - */ - spin_unlock_bh(&config_lock); tipc_core_start_net(addr); - spin_lock_bh(&config_lock); return tipc_cfg_reply_none(); } @@ -248,7 +237,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area { struct sk_buff *rep_tlv_buf; - spin_lock_bh(&config_lock); + mutex_lock(&config_mutex); /* Save request and reply details in a well-known location */ req_tlv_area = request_area; @@ -377,37 +366,31 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area /* Return reply buffer */ exit: - spin_unlock_bh(&config_lock); + mutex_unlock(&config_mutex); return rep_tlv_buf; } -static void cfg_named_msg_event(void *userdata, - u32 port_ref, - struct sk_buff **buf, - const unchar *msg, - u32 size, - u32 importance, - struct tipc_portid const *orig, - struct tipc_name_seq const *dest) +static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len) { struct tipc_cfg_msg_hdr *req_hdr; struct tipc_cfg_msg_hdr *rep_hdr; struct sk_buff *rep_buf; + int ret; /* Validate configuration message header (ignore invalid message) */ - req_hdr = (struct tipc_cfg_msg_hdr *)msg; - if ((size < sizeof(*req_hdr)) || - (size != TCM_ALIGN(ntohl(req_hdr->tcm_len))) || + req_hdr = (struct tipc_cfg_msg_hdr *)buf; + if ((len < sizeof(*req_hdr)) || + (len != TCM_ALIGN(ntohl(req_hdr->tcm_len))) || (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) { pr_warn("Invalid configuration message discarded\n"); return; } /* Generate reply for request (if can't, return request) */ - rep_buf = tipc_cfg_do_cmd(orig->node, - ntohs(req_hdr->tcm_type), - msg + sizeof(*req_hdr), - size - sizeof(*req_hdr), + rep_buf = tipc_cfg_do_cmd(addr->addr.id.node, ntohs(req_hdr->tcm_type), + buf + sizeof(*req_hdr), + len - sizeof(*req_hdr), BUF_HEADROOM + MAX_H_SIZE + sizeof(*rep_hdr)); if (rep_buf) { skb_push(rep_buf, sizeof(*rep_hdr)); @@ -415,57 +398,51 @@ static void cfg_named_msg_event(void *userdata, memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr)); rep_hdr->tcm_len = htonl(rep_buf->len); rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST); - } else { - rep_buf = *buf; - *buf = NULL; - } - /* NEED TO ADD CODE TO HANDLE FAILED SEND (SUCH AS CONGESTION) */ - tipc_send_buf2port(port_ref, orig, rep_buf, rep_buf->len); + ret = tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data, + rep_buf->len); + if (ret < 0) + pr_err("Sending cfg reply message failed, no memory\n"); + + kfree_skb(rep_buf); + } } +static struct sockaddr_tipc cfgsrv_addr __read_mostly = { + .family = AF_TIPC, + .addrtype = TIPC_ADDR_NAMESEQ, + .addr.nameseq.type = TIPC_CFG_SRV, + .addr.nameseq.lower = 0, + .addr.nameseq.upper = 0, + .scope = TIPC_ZONE_SCOPE +}; + +static struct tipc_server cfgsrv __read_mostly = { + .saddr = &cfgsrv_addr, + .imp = TIPC_CRITICAL_IMPORTANCE, + .type = SOCK_RDM, + .max_rcvbuf_size = 64 * 1024, + .name = "cfg_server", + .tipc_conn_recvmsg = cfg_conn_msg_event, + .tipc_conn_new = NULL, + .tipc_conn_shutdown = NULL +}; + int tipc_cfg_init(void) { - struct tipc_name_seq seq; - int res; - - res = tipc_createport(NULL, TIPC_CRITICAL_IMPORTANCE, - NULL, NULL, NULL, - NULL, cfg_named_msg_event, NULL, - NULL, &config_port_ref); - if (res) - goto failed; - - seq.type = TIPC_CFG_SRV; - seq.lower = seq.upper = tipc_own_addr; - res = tipc_publish(config_port_ref, TIPC_ZONE_SCOPE, &seq); - if (res) - goto failed; - - return 0; - -failed: - pr_err("Unable to create configuration service\n"); - return res; + return tipc_server_start(&cfgsrv); } void tipc_cfg_reinit(void) { - struct tipc_name_seq seq; - int res; - - seq.type = TIPC_CFG_SRV; - seq.lower = seq.upper = 0; - tipc_withdraw(config_port_ref, TIPC_ZONE_SCOPE, &seq); + tipc_server_stop(&cfgsrv); - seq.lower = seq.upper = tipc_own_addr; - res = tipc_publish(config_port_ref, TIPC_ZONE_SCOPE, &seq); - if (res) - pr_err("Unable to reinitialize configuration service\n"); + cfgsrv_addr.addr.nameseq.lower = tipc_own_addr; + cfgsrv_addr.addr.nameseq.upper = tipc_own_addr; + tipc_server_start(&cfgsrv); } void tipc_cfg_stop(void) { - tipc_deleteport(config_port_ref); - config_port_ref = 0; + tipc_server_stop(&cfgsrv); } diff --git a/net/tipc/core.c b/net/tipc/core.c index 7ec2c1eb94f1..fd4eeeaa972a 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -2,7 +2,7 @@ * net/tipc/core.c: TIPC module code * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005-2006, 2010-2011, Wind River Systems + * Copyright (c) 2005-2006, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,6 +39,7 @@ #include "name_table.h" #include "subscr.h" #include "config.h" +#include "port.h" #include <linux/module.h> @@ -50,7 +51,7 @@ u32 tipc_own_addr __read_mostly; int tipc_max_ports __read_mostly; int tipc_net_id __read_mostly; int tipc_remote_management __read_mostly; - +int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */ /** * tipc_buf_acquire - creates a TIPC message buffer @@ -118,6 +119,7 @@ static void tipc_core_stop(void) tipc_nametbl_stop(); tipc_ref_table_stop(); tipc_socket_stop(); + tipc_unregister_sysctl(); } /** @@ -135,20 +137,21 @@ static int tipc_core_start(void) if (!res) res = tipc_nametbl_init(); if (!res) - res = tipc_subscr_start(); - if (!res) - res = tipc_cfg_init(); - if (!res) res = tipc_netlink_start(); if (!res) res = tipc_socket_init(); + if (!res) + res = tipc_register_sysctl(); + if (!res) + res = tipc_subscr_start(); + if (!res) + res = tipc_cfg_init(); if (res) tipc_core_stop(); return res; } - static int __init tipc_init(void) { int res; @@ -160,6 +163,11 @@ static int __init tipc_init(void) tipc_max_ports = CONFIG_TIPC_PORTS; tipc_net_id = 4711; + sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE; + sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 << + TIPC_CRITICAL_IMPORTANCE; + sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT; + res = tipc_core_start(); if (res) pr_err("Unable to start in single node mode\n"); diff --git a/net/tipc/core.h b/net/tipc/core.h index 0207db04179a..be72f8cebc53 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -1,8 +1,8 @@ /* * net/tipc/core.h: Include file for TIPC global declarations * - * Copyright (c) 2005-2006, Ericsson AB - * Copyright (c) 2005-2007, 2010-2011, Wind River Systems + * Copyright (c) 2005-2006, 2013 Ericsson AB + * Copyright (c) 2005-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -80,6 +80,7 @@ extern u32 tipc_own_addr __read_mostly; extern int tipc_max_ports __read_mostly; extern int tipc_net_id __read_mostly; extern int tipc_remote_management __read_mostly; +extern int sysctl_tipc_rmem[3] __read_mostly; /* * Other global variables @@ -96,6 +97,18 @@ extern int tipc_netlink_start(void); extern void tipc_netlink_stop(void); extern int tipc_socket_init(void); extern void tipc_socket_stop(void); +extern int tipc_sock_create_local(int type, struct socket **res); +extern void tipc_sock_release_local(struct socket *sock); +extern int tipc_sock_accept_local(struct socket *sock, + struct socket **newsock, int flags); + +#ifdef CONFIG_SYSCTL +extern int tipc_register_sysctl(void); +extern void tipc_unregister_sysctl(void); +#else +#define tipc_register_sysctl() 0 +#define tipc_unregister_sysctl() +#endif /* * TIPC timer and signal code diff --git a/net/tipc/discover.c b/net/tipc/discover.c index eedff58d0387..ecc758c6eacf 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -70,8 +70,7 @@ struct tipc_link_req { * @dest_domain: network domain of node(s) which should respond to message * @b_ptr: ptr to bearer issuing message */ -static struct sk_buff *tipc_disc_init_msg(u32 type, - u32 dest_domain, +static struct sk_buff *tipc_disc_init_msg(u32 type, u32 dest_domain, struct tipc_bearer *b_ptr) { struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE); @@ -346,8 +345,8 @@ exit: * * Returns 0 if successful, otherwise -errno. */ -int tipc_disc_create(struct tipc_bearer *b_ptr, - struct tipc_media_addr *dest, u32 dest_domain) +int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, + u32 dest_domain) { struct tipc_link_req *req; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 120a676a3360..40ea40cf6204 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -62,7 +62,7 @@ static struct eth_bearer eth_bearers[MAX_ETH_BEARERS]; static int eth_started; static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv); + void *dv); /* * Network device notifier info */ @@ -162,8 +162,7 @@ static void setup_bearer(struct work_struct *work) */ static int enable_bearer(struct tipc_bearer *tb_ptr) { - struct net_device *dev = NULL; - struct net_device *pdev = NULL; + struct net_device *dev; struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; @@ -178,15 +177,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) } /* Find device with specified name */ - read_lock(&dev_base_lock); - for_each_netdev(&init_net, pdev) { - if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { - dev = pdev; - dev_hold(dev); - break; - } - } - read_unlock(&dev_base_lock); + dev = dev_get_by_name(&init_net, driver_name); if (!dev) return -ENODEV; @@ -251,9 +242,9 @@ static void disable_bearer(struct tipc_bearer *tb_ptr) * specified device. */ static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv) + void *ptr) { - struct net_device *dev = (struct net_device *)dv; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 2a2864c25e15..ad2e1ec4117e 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -155,8 +155,7 @@ static void setup_bearer(struct work_struct *work) */ static int enable_bearer(struct tipc_bearer *tb_ptr) { - struct net_device *dev = NULL; - struct net_device *pdev = NULL; + struct net_device *dev; struct ib_bearer *ib_ptr = &ib_bearers[0]; struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; @@ -171,15 +170,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) } /* Find device with specified name */ - read_lock(&dev_base_lock); - for_each_netdev(&init_net, pdev) { - if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { - dev = pdev; - dev_hold(dev); - break; - } - } - read_unlock(&dev_base_lock); + dev = dev_get_by_name(&init_net, driver_name); if (!dev) return -ENODEV; @@ -244,9 +235,9 @@ static void disable_bearer(struct tipc_bearer *tb_ptr) * specified device. */ static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv) + void *ptr) { - struct net_device *dev = (struct net_device *)dv; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ib_bearer *ib_ptr = &ib_bearers[0]; struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; diff --git a/net/tipc/link.c b/net/tipc/link.c index a80feee5197a..0cc3d9015c5d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2,7 +2,7 @@ * net/tipc/link.c: TIPC link code * * Copyright (c) 1996-2007, 2012, Ericsson AB - * Copyright (c) 2004-2007, 2010-2011, Wind River Systems + * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,6 +41,8 @@ #include "discover.h" #include "config.h" +#include <linux/pkt_sched.h> + /* * Error message prefixes */ @@ -771,8 +773,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) * link_bundle_buf(): Append contents of a buffer to * the tail of an existing one. */ -static int link_bundle_buf(struct tipc_link *l_ptr, - struct sk_buff *bundler, +static int link_bundle_buf(struct tipc_link *l_ptr, struct sk_buff *bundler, struct sk_buff *buf) { struct tipc_msg *bundler_msg = buf_msg(bundler); @@ -1057,40 +1058,6 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf, } /* - * tipc_send_buf_fast: Entry for data messages where the - * destination node is known and the header is complete, - * inclusive total message length. - * Returns user data length. - */ -int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode) -{ - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; - int res; - u32 selector = msg_origport(buf_msg(buf)) & 1; - u32 dummy; - - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(destnode); - if (likely(n_ptr)) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[selector]; - if (likely(l_ptr)) { - res = link_send_buf_fast(l_ptr, buf, &dummy); - tipc_node_unlock(n_ptr); - read_unlock_bh(&tipc_net_lock); - return res; - } - tipc_node_unlock(n_ptr); - } - read_unlock_bh(&tipc_net_lock); - res = msg_data_sz(buf_msg(buf)); - tipc_reject_msg(buf, TIPC_ERR_NO_NODE); - return res; -} - - -/* * tipc_link_send_sections_fast: Entry for messages where the * destination processor is known and the header is complete, * except for total message length. @@ -1098,8 +1065,7 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode) */ int tipc_link_send_sections_fast(struct tipc_port *sender, struct iovec const *msg_sect, - const u32 num_sect, - unsigned int total_len, + const u32 num_sect, unsigned int total_len, u32 destaddr) { struct tipc_msg *hdr = &sender->phdr; @@ -1115,7 +1081,10 @@ again: * (Must not hold any locks while building message.) */ res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, - sender->max_pkt, !sender->user_port, &buf); + sender->max_pkt, &buf); + /* Exit if build request was invalid */ + if (unlikely(res < 0)) + return res; read_lock_bh(&tipc_net_lock); node = tipc_node_find(destaddr); @@ -1132,10 +1101,6 @@ exit: return res; } - /* Exit if build request was invalid */ - if (unlikely(res < 0)) - goto exit; - /* Exit if link (or bearer) is congested */ if (link_congested(l_ptr) || tipc_bearer_blocked(l_ptr->b_ptr)) { @@ -1189,8 +1154,7 @@ exit: */ static int link_send_sections_long(struct tipc_port *sender, struct iovec const *msg_sect, - u32 num_sect, - unsigned int total_len, + u32 num_sect, unsigned int total_len, u32 destaddr) { struct tipc_link *l_ptr; @@ -1204,6 +1168,7 @@ static int link_send_sections_long(struct tipc_port *sender, const unchar *sect_crs; int curr_sect; u32 fragm_no; + int res = 0; again: fragm_no = 1; @@ -1250,18 +1215,15 @@ again: else sz = fragm_rest; - if (likely(!sender->user_port)) { - if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { + if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { + res = -EFAULT; error: - for (; buf_chain; buf_chain = buf) { - buf = buf_chain->next; - kfree_skb(buf_chain); - } - return -EFAULT; + for (; buf_chain; buf_chain = buf) { + buf = buf_chain->next; + kfree_skb(buf_chain); } - } else - skb_copy_to_linear_data_offset(buf, fragm_crs, - sect_crs, sz); + return res; + } sect_crs += sz; sect_rest -= sz; fragm_crs += sz; @@ -1281,8 +1243,10 @@ error: msg_set_fragm_no(&fragm_hdr, ++fragm_no); prev = buf; buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE); - if (!buf) + if (!buf) { + res = -ENOMEM; goto error; + } buf->next = NULL; prev->next = buf; @@ -1446,7 +1410,7 @@ static void link_reset_all(unsigned long addr) } static void link_retransmit_failure(struct tipc_link *l_ptr, - struct sk_buff *buf) + struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); @@ -1901,8 +1865,8 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, * Send protocol message to the other endpoint. */ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, - int probe_msg, u32 gap, u32 tolerance, - u32 priority, u32 ack_mtu) + int probe_msg, u32 gap, u32 tolerance, + u32 priority, u32 ack_mtu) { struct sk_buff *buf = NULL; struct tipc_msg *msg = l_ptr->pmsg; @@ -1988,6 +1952,7 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, return; skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); + buf->priority = TC_PRIO_CONTROL; /* Defer message if bearer is already blocked */ if (tipc_bearer_blocked(l_ptr->b_ptr)) { @@ -2145,8 +2110,7 @@ exit: * another bearer. Owner node is locked. */ static void tipc_link_tunnel(struct tipc_link *l_ptr, - struct tipc_msg *tunnel_hdr, - struct tipc_msg *msg, + struct tipc_msg *tunnel_hdr, struct tipc_msg *msg, u32 selector) { struct tipc_link *tunnel; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index f2db8a87d9c5..ced60e2fc4f7 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -51,8 +51,8 @@ u32 tipc_msg_tot_importance(struct tipc_msg *m) } -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, - u32 hsize, u32 destnode) +void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, + u32 destnode) { memset(m, 0, hsize); msg_set_version(m); @@ -73,8 +73,8 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, * Returns message data size or errno */ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, - int max_size, int usrmem, struct sk_buff **buf) + u32 num_sect, unsigned int total_len, int max_size, + struct sk_buff **buf) { int dsz, sz, hsz, pos, res, cnt; @@ -92,14 +92,9 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, return -ENOMEM; skb_copy_to_linear_data(*buf, hdr, hsz); for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) { - if (likely(usrmem)) - res = !copy_from_user((*buf)->data + pos, - msg_sect[cnt].iov_base, - msg_sect[cnt].iov_len); - else - skb_copy_to_linear_data_offset(*buf, pos, - msg_sect[cnt].iov_base, - msg_sect[cnt].iov_len); + skb_copy_to_linear_data_offset(*buf, pos, + msg_sect[cnt].iov_base, + msg_sect[cnt].iov_len); pos += msg_sect[cnt].iov_len; } if (likely(res)) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index ba2a72beea68..5e4ccf5c27df 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -719,9 +719,9 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) } u32 tipc_msg_tot_importance(struct tipc_msg *m); -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, - u32 hsize, u32 destnode); +void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, + u32 destnode); int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, - int max_size, int usrmem, struct sk_buff **buf); + u32 num_sect, unsigned int total_len, int max_size, + struct sk_buff **buf); #endif diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 24b167914311..09dcd54b04e1 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -440,7 +440,7 @@ found: * sequence overlapping with the requested sequence */ static void tipc_nameseq_subscribe(struct name_seq *nseq, - struct tipc_subscription *s) + struct tipc_subscription *s) { struct sub_seq *sseq = nseq->sseqs; @@ -662,7 +662,7 @@ exit: * tipc_nametbl_publish - add name publication to network name tables */ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key) + u32 scope, u32 port_ref, u32 key) { struct publication *publ; @@ -753,7 +753,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) * subseq_list - print specified sub-sequence contents into the given buffer */ static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth, - u32 index) + u32 index) { char portIdStr[27]; const char *scope_str[] = {"", " zone", " cluster", " node"}; @@ -792,7 +792,7 @@ static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth, * nameseq_list - print specified name sequence contents into the given buffer */ static int nameseq_list(struct name_seq *seq, char *buf, int len, u32 depth, - u32 type, u32 lowbound, u32 upbound, u32 index) + u32 type, u32 lowbound, u32 upbound, u32 index) { struct sub_seq *sseq; char typearea[11]; @@ -849,7 +849,7 @@ static int nametbl_header(char *buf, int len, u32 depth) * nametbl_list - print specified name table contents into the given buffer */ static int nametbl_list(char *buf, int len, u32 depth_info, - u32 type, u32 lowbound, u32 upbound) + u32 type, u32 lowbound, u32 upbound) { struct hlist_head *seq_head; struct name_seq *seq; diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 71cb4dc712df..f02f48b9a216 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -87,14 +87,15 @@ extern rwlock_t tipc_nametbl_lock; struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space); u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node); int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, - struct tipc_port_list *dports); + struct tipc_port_list *dports); struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key); + u32 scope, u32 port_ref, u32 key); int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key); struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 ref, u32 key); -struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, - u32 node, u32 ref, u32 key); + u32 scope, u32 node, u32 ref, + u32 key); +struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, u32 node, + u32 ref, u32 key); void tipc_nametbl_subscribe(struct tipc_subscription *s); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(void); diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 5e34b015da45..8a7384c04add 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -42,7 +42,7 @@ * tipc_nodesub_subscribe - create "node down" subscription for specified node */ void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, - void *usr_handle, net_ev_handler handle_down) + void *usr_handle, net_ev_handler handle_down) { if (in_own_node(addr)) { node_sub->node = NULL; diff --git a/net/tipc/port.c b/net/tipc/port.c index 18098cac62f2..b3ed2fcab4fb 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -2,7 +2,7 @@ * net/tipc/port.c: TIPC port code * * Copyright (c) 1992-2007, Ericsson AB - * Copyright (c) 2004-2008, 2010-2011, Wind River Systems + * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,11 +46,7 @@ #define MAX_REJECT_SIZE 1024 -static struct sk_buff *msg_queue_head; -static struct sk_buff *msg_queue_tail; - DEFINE_SPINLOCK(tipc_port_list_lock); -static DEFINE_SPINLOCK(queue_lock); static LIST_HEAD(ports); static void port_handle_node_down(unsigned long ref); @@ -119,7 +115,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, msg_set_nameupper(hdr, seq->upper); msg_set_hdr_sz(hdr, MCAST_H_SIZE); res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE, - !oport->user_port, &buf); + &buf); if (unlikely(!buf)) return res; @@ -206,14 +202,15 @@ exit: } /** - * tipc_createport_raw - create a generic TIPC port + * tipc_createport - create a generic TIPC port * * Returns pointer to (locked) TIPC port, or NULL if unable to create it */ -struct tipc_port *tipc_createport_raw(void *usr_handle, - u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), - void (*wakeup)(struct tipc_port *), - const u32 importance) +struct tipc_port *tipc_createport(struct sock *sk, + u32 (*dispatcher)(struct tipc_port *, + struct sk_buff *), + void (*wakeup)(struct tipc_port *), + const u32 importance) { struct tipc_port *p_ptr; struct tipc_msg *msg; @@ -231,14 +228,13 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, return NULL; } - p_ptr->usr_handle = usr_handle; + p_ptr->sk = sk; p_ptr->max_pkt = MAX_PKT_DEFAULT; p_ptr->ref = ref; INIT_LIST_HEAD(&p_ptr->wait_list); INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); p_ptr->dispatcher = dispatcher; p_ptr->wakeup = wakeup; - p_ptr->user_port = NULL; k_init_timer(&p_ptr->timer, (Handler)port_timeout, ref); INIT_LIST_HEAD(&p_ptr->publications); INIT_LIST_HEAD(&p_ptr->port_list); @@ -275,7 +271,6 @@ int tipc_deleteport(u32 ref) buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); tipc_nodesub_unsubscribe(&p_ptr->subscription); } - kfree(p_ptr->user_port); spin_lock_bh(&tipc_port_list_lock); list_del(&p_ptr->port_list); @@ -448,7 +443,7 @@ int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, int res; res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE, - !p_ptr->user_port, &buf); + &buf); if (!buf) return res; @@ -668,215 +663,6 @@ void tipc_port_reinit(void) spin_unlock_bh(&tipc_port_list_lock); } - -/* - * port_dispatcher_sigh(): Signal handler for messages destinated - * to the tipc_port interface. - */ -static void port_dispatcher_sigh(void *dummy) -{ - struct sk_buff *buf; - - spin_lock_bh(&queue_lock); - buf = msg_queue_head; - msg_queue_head = NULL; - spin_unlock_bh(&queue_lock); - - while (buf) { - struct tipc_port *p_ptr; - struct user_port *up_ptr; - struct tipc_portid orig; - struct tipc_name_seq dseq; - void *usr_handle; - int connected; - int peer_invalid; - int published; - u32 message_type; - - struct sk_buff *next = buf->next; - struct tipc_msg *msg = buf_msg(buf); - u32 dref = msg_destport(msg); - - message_type = msg_type(msg); - if (message_type > TIPC_DIRECT_MSG) - goto reject; /* Unsupported message type */ - - p_ptr = tipc_port_lock(dref); - if (!p_ptr) - goto reject; /* Port deleted while msg in queue */ - - orig.ref = msg_origport(msg); - orig.node = msg_orignode(msg); - up_ptr = p_ptr->user_port; - usr_handle = up_ptr->usr_handle; - connected = p_ptr->connected; - peer_invalid = connected && !tipc_port_peer_msg(p_ptr, msg); - published = p_ptr->published; - - if (unlikely(msg_errcode(msg))) - goto err; - - switch (message_type) { - - case TIPC_CONN_MSG:{ - tipc_conn_msg_event cb = up_ptr->conn_msg_cb; - u32 dsz; - - tipc_port_unlock(p_ptr); - if (unlikely(!cb)) - goto reject; - if (unlikely(!connected)) { - if (tipc_connect(dref, &orig)) - goto reject; - } else if (peer_invalid) - goto reject; - dsz = msg_data_sz(msg); - if (unlikely(dsz && - (++p_ptr->conn_unacked >= - TIPC_FLOW_CONTROL_WIN))) - tipc_acknowledge(dref, - p_ptr->conn_unacked); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), dsz); - break; - } - case TIPC_DIRECT_MSG:{ - tipc_msg_event cb = up_ptr->msg_cb; - - tipc_port_unlock(p_ptr); - if (unlikely(!cb || connected)) - goto reject; - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_importance(msg), - &orig); - break; - } - case TIPC_MCAST_MSG: - case TIPC_NAMED_MSG:{ - tipc_named_msg_event cb = up_ptr->named_msg_cb; - - tipc_port_unlock(p_ptr); - if (unlikely(!cb || connected || !published)) - goto reject; - dseq.type = msg_nametype(msg); - dseq.lower = msg_nameinst(msg); - dseq.upper = (message_type == TIPC_NAMED_MSG) - ? dseq.lower : msg_nameupper(msg); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_importance(msg), - &orig, &dseq); - break; - } - } - if (buf) - kfree_skb(buf); - buf = next; - continue; -err: - switch (message_type) { - - case TIPC_CONN_MSG:{ - tipc_conn_shutdown_event cb = - up_ptr->conn_err_cb; - - tipc_port_unlock(p_ptr); - if (!cb || !connected || peer_invalid) - break; - tipc_disconnect(dref); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_errcode(msg)); - break; - } - case TIPC_DIRECT_MSG:{ - tipc_msg_err_event cb = up_ptr->err_cb; - - tipc_port_unlock(p_ptr); - if (!cb || connected) - break; - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_errcode(msg), &orig); - break; - } - case TIPC_MCAST_MSG: - case TIPC_NAMED_MSG:{ - tipc_named_msg_err_event cb = - up_ptr->named_err_cb; - - tipc_port_unlock(p_ptr); - if (!cb || connected) - break; - dseq.type = msg_nametype(msg); - dseq.lower = msg_nameinst(msg); - dseq.upper = (message_type == TIPC_NAMED_MSG) - ? dseq.lower : msg_nameupper(msg); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_errcode(msg), &dseq); - break; - } - } - if (buf) - kfree_skb(buf); - buf = next; - continue; -reject: - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); - buf = next; - } -} - -/* - * port_dispatcher(): Dispatcher for messages destinated - * to the tipc_port interface. Called with port locked. - */ -static u32 port_dispatcher(struct tipc_port *dummy, struct sk_buff *buf) -{ - buf->next = NULL; - spin_lock_bh(&queue_lock); - if (msg_queue_head) { - msg_queue_tail->next = buf; - msg_queue_tail = buf; - } else { - msg_queue_tail = msg_queue_head = buf; - tipc_k_signal((Handler)port_dispatcher_sigh, 0); - } - spin_unlock_bh(&queue_lock); - return 0; -} - -/* - * Wake up port after congestion: Called with port locked - */ -static void port_wakeup_sh(unsigned long ref) -{ - struct tipc_port *p_ptr; - struct user_port *up_ptr; - tipc_continue_event cb = NULL; - void *uh = NULL; - - p_ptr = tipc_port_lock(ref); - if (p_ptr) { - up_ptr = p_ptr->user_port; - if (up_ptr) { - cb = up_ptr->continue_event_cb; - uh = up_ptr->usr_handle; - } - tipc_port_unlock(p_ptr); - } - if (cb) - cb(uh, ref); -} - - -static void port_wakeup(struct tipc_port *p_ptr) -{ - tipc_k_signal((Handler)port_wakeup_sh, p_ptr->ref); -} - void tipc_acknowledge(u32 ref, u32 ack) { struct tipc_port *p_ptr; @@ -893,50 +679,6 @@ void tipc_acknowledge(u32 ref, u32 ack) tipc_net_route_msg(buf); } -/* - * tipc_createport(): user level call. - */ -int tipc_createport(void *usr_handle, - unsigned int importance, - tipc_msg_err_event error_cb, - tipc_named_msg_err_event named_error_cb, - tipc_conn_shutdown_event conn_error_cb, - tipc_msg_event msg_cb, - tipc_named_msg_event named_msg_cb, - tipc_conn_msg_event conn_msg_cb, - tipc_continue_event continue_event_cb, /* May be zero */ - u32 *portref) -{ - struct user_port *up_ptr; - struct tipc_port *p_ptr; - - up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC); - if (!up_ptr) { - pr_warn("Port creation failed, no memory\n"); - return -ENOMEM; - } - p_ptr = tipc_createport_raw(NULL, port_dispatcher, port_wakeup, - importance); - if (!p_ptr) { - kfree(up_ptr); - return -ENOMEM; - } - - p_ptr->user_port = up_ptr; - up_ptr->usr_handle = usr_handle; - up_ptr->ref = p_ptr->ref; - up_ptr->err_cb = error_cb; - up_ptr->named_err_cb = named_error_cb; - up_ptr->conn_err_cb = conn_error_cb; - up_ptr->msg_cb = msg_cb; - up_ptr->named_msg_cb = named_msg_cb; - up_ptr->conn_msg_cb = conn_msg_cb; - up_ptr->continue_event_cb = continue_event_cb; - *portref = p_ptr->ref; - tipc_port_unlock(p_ptr); - return 0; -} - int tipc_portimportance(u32 ref, unsigned int *importance) { struct tipc_port *p_ptr; @@ -1184,7 +926,7 @@ static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_se int res; res = tipc_msg_build(&sender->phdr, msg_sect, num_sect, total_len, - MAX_MSG_SIZE, !sender->user_port, &buf); + MAX_MSG_SIZE, &buf); if (likely(buf)) tipc_port_recv_msg(buf); return res; @@ -1322,43 +1064,3 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest, } return -ELINKCONG; } - -/** - * tipc_send_buf2port - send message buffer to port identity - */ -int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest, - struct sk_buff *buf, unsigned int dsz) -{ - struct tipc_port *p_ptr; - struct tipc_msg *msg; - int res; - - p_ptr = (struct tipc_port *)tipc_ref_deref(ref); - if (!p_ptr || p_ptr->connected) - return -EINVAL; - - msg = &p_ptr->phdr; - msg_set_type(msg, TIPC_DIRECT_MSG); - msg_set_destnode(msg, dest->node); - msg_set_destport(msg, dest->ref); - msg_set_hdr_sz(msg, BASIC_H_SIZE); - msg_set_size(msg, BASIC_H_SIZE + dsz); - if (skb_cow(buf, BASIC_H_SIZE)) - return -ENOMEM; - - skb_push(buf, BASIC_H_SIZE); - skb_copy_to_linear_data(buf, msg, BASIC_H_SIZE); - - if (in_own_node(dest->node)) - res = tipc_port_recv_msg(buf); - else - res = tipc_send_buf_fast(buf, dest->node); - if (likely(res != -ELINKCONG)) { - if (res > 0) - p_ptr->sent++; - return res; - } - if (port_unreliable(p_ptr)) - return dsz; - return -ELINKCONG; -} diff --git a/net/tipc/port.h b/net/tipc/port.h index fb66e2e5f4d1..5a7026b9c345 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -2,7 +2,7 @@ * net/tipc/port.h: Include file for TIPC port code * * Copyright (c) 1994-2007, Ericsson AB - * Copyright (c) 2004-2007, 2010-2011, Wind River Systems + * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,60 +43,12 @@ #include "node_subscr.h" #define TIPC_FLOW_CONTROL_WIN 512 - -typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, int reason, - struct tipc_portid const *attmpt_destid); - -typedef void (*tipc_named_msg_err_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, int reason, - struct tipc_name_seq const *attmpt_dest); - -typedef void (*tipc_conn_shutdown_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, int reason); - -typedef void (*tipc_msg_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, unsigned int importance, - struct tipc_portid const *origin); - -typedef void (*tipc_named_msg_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, unsigned int importance, - struct tipc_portid const *orig, - struct tipc_name_seq const *dest); - -typedef void (*tipc_conn_msg_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size); - -typedef void (*tipc_continue_event) (void *usr_handle, u32 portref); - -/** - * struct user_port - TIPC user port (used with native API) - * @usr_handle: user-specified field - * @ref: object reference to associated TIPC port - * - * <various callback routines> - */ -struct user_port { - void *usr_handle; - u32 ref; - tipc_msg_err_event err_cb; - tipc_named_msg_err_event named_err_cb; - tipc_conn_shutdown_event conn_err_cb; - tipc_msg_event msg_cb; - tipc_named_msg_event named_msg_cb; - tipc_conn_msg_event conn_msg_cb; - tipc_continue_event continue_event_cb; -}; +#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ + SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) /** * struct tipc_port - TIPC port structure - * @usr_handle: pointer to additional user-defined information about port + * @sk: pointer to socket handle * @lock: pointer to spinlock for controlling access to port * @connected: non-zero if port is currently connected to a peer port * @conn_type: TIPC type used when connection was established @@ -110,7 +62,6 @@ struct user_port { * @port_list: adjacent ports in TIPC's global list of ports * @dispatcher: ptr to routine which handles received messages * @wakeup: ptr to routine to call when port is no longer congested - * @user_port: ptr to user port associated with port (if any) * @wait_list: adjacent ports in list of ports waiting on link congestion * @waiting_pkts: * @sent: # of non-empty messages sent by port @@ -123,7 +74,7 @@ struct user_port { * @subscription: "node down" subscription used to terminate failed connections */ struct tipc_port { - void *usr_handle; + struct sock *sk; spinlock_t *lock; int connected; u32 conn_type; @@ -137,7 +88,6 @@ struct tipc_port { struct list_head port_list; u32 (*dispatcher)(struct tipc_port *, struct sk_buff *); void (*wakeup)(struct tipc_port *); - struct user_port *user_port; struct list_head wait_list; u32 waiting_pkts; u32 sent; @@ -156,24 +106,16 @@ struct tipc_port_list; /* * TIPC port manipulation routines */ -struct tipc_port *tipc_createport_raw(void *usr_handle, - u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), - void (*wakeup)(struct tipc_port *), const u32 importance); +struct tipc_port *tipc_createport(struct sock *sk, + u32 (*dispatcher)(struct tipc_port *, + struct sk_buff *), + void (*wakeup)(struct tipc_port *), + const u32 importance); int tipc_reject_msg(struct sk_buff *buf, u32 err); -int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode); - void tipc_acknowledge(u32 port_ref, u32 ack); -int tipc_createport(void *usr_handle, - unsigned int importance, tipc_msg_err_event error_cb, - tipc_named_msg_err_event named_error_cb, - tipc_conn_shutdown_event conn_error_cb, tipc_msg_event msg_cb, - tipc_named_msg_event named_msg_cb, - tipc_conn_msg_event conn_msg_cb, - tipc_continue_event continue_event_cb, u32 *portref); - int tipc_deleteport(u32 portref); int tipc_portimportance(u32 portref, unsigned int *importance); @@ -186,9 +128,9 @@ int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable); int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); int tipc_publish(u32 portref, unsigned int scope, - struct tipc_name_seq const *name_seq); + struct tipc_name_seq const *name_seq); int tipc_withdraw(u32 portref, unsigned int scope, - struct tipc_name_seq const *name_seq); + struct tipc_name_seq const *name_seq); int tipc_connect(u32 portref, struct tipc_portid const *port); @@ -220,9 +162,6 @@ int tipc_send2port(u32 portref, struct tipc_portid const *dest, unsigned int num_sect, struct iovec const *msg_sect, unsigned int total_len); -int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest, - struct sk_buff *buf, unsigned int dsz); - int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, unsigned int section_count, struct iovec const *msg, unsigned int total_len); diff --git a/net/tipc/server.c b/net/tipc/server.c new file mode 100644 index 000000000000..19da5abe0fa6 --- /dev/null +++ b/net/tipc/server.c @@ -0,0 +1,596 @@ +/* + * net/tipc/server.c: TIPC server infrastructure + * + * Copyright (c) 2012-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" +#include "core.h" +#include <net/sock.h> + +/* Number of messages to send before rescheduling */ +#define MAX_SEND_MSG_COUNT 25 +#define MAX_RECV_MSG_COUNT 25 +#define CF_CONNECTED 1 + +#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data) + +/** + * struct tipc_conn - TIPC connection structure + * @kref: reference counter to connection object + * @conid: connection identifier + * @sock: socket handler associated with connection + * @flags: indicates connection state + * @server: pointer to connected server + * @rwork: receive work item + * @usr_data: user-specified field + * @rx_action: what to do when connection socket is active + * @outqueue: pointer to first outbound message in queue + * @outqueue_lock: controll access to the outqueue + * @outqueue: list of connection objects for its server + * @swork: send work item + */ +struct tipc_conn { + struct kref kref; + int conid; + struct socket *sock; + unsigned long flags; + struct tipc_server *server; + struct work_struct rwork; + int (*rx_action) (struct tipc_conn *con); + void *usr_data; + struct list_head outqueue; + spinlock_t outqueue_lock; + struct work_struct swork; +}; + +/* An entry waiting to be sent */ +struct outqueue_entry { + struct list_head list; + struct kvec iov; + struct sockaddr_tipc dest; +}; + +static void tipc_recv_work(struct work_struct *work); +static void tipc_send_work(struct work_struct *work); +static void tipc_clean_outqueues(struct tipc_conn *con); + +static void tipc_conn_kref_release(struct kref *kref) +{ + struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + struct tipc_server *s = con->server; + + if (con->sock) { + tipc_sock_release_local(con->sock); + con->sock = NULL; + } + + tipc_clean_outqueues(con); + + if (con->conid) + s->tipc_conn_shutdown(con->conid, con->usr_data); + + kfree(con); +} + +static void conn_put(struct tipc_conn *con) +{ + kref_put(&con->kref, tipc_conn_kref_release); +} + +static void conn_get(struct tipc_conn *con) +{ + kref_get(&con->kref); +} + +static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) +{ + struct tipc_conn *con; + + spin_lock_bh(&s->idr_lock); + con = idr_find(&s->conn_idr, conid); + if (con) + conn_get(con); + spin_unlock_bh(&s->idr_lock); + return con; +} + +static void sock_data_ready(struct sock *sk, int unused) +{ + struct tipc_conn *con; + + read_lock(&sk->sk_callback_lock); + con = sock2con(sk); + if (con && test_bit(CF_CONNECTED, &con->flags)) { + conn_get(con); + if (!queue_work(con->server->rcv_wq, &con->rwork)) + conn_put(con); + } + read_unlock(&sk->sk_callback_lock); +} + +static void sock_write_space(struct sock *sk) +{ + struct tipc_conn *con; + + read_lock(&sk->sk_callback_lock); + con = sock2con(sk); + if (con && test_bit(CF_CONNECTED, &con->flags)) { + conn_get(con); + if (!queue_work(con->server->send_wq, &con->swork)) + conn_put(con); + } + read_unlock(&sk->sk_callback_lock); +} + +static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con) +{ + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_data_ready = sock_data_ready; + sk->sk_write_space = sock_write_space; + sk->sk_user_data = con; + + con->sock = sock; + + write_unlock_bh(&sk->sk_callback_lock); +} + +static void tipc_unregister_callbacks(struct tipc_conn *con) +{ + struct sock *sk = con->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + sk->sk_user_data = NULL; + write_unlock_bh(&sk->sk_callback_lock); +} + +static void tipc_close_conn(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + + if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { + spin_lock_bh(&s->idr_lock); + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(&s->idr_lock); + + tipc_unregister_callbacks(con); + + /* We shouldn't flush pending works as we may be in the + * thread. In fact the races with pending rx/tx work structs + * are harmless for us here as we have already deleted this + * connection from server connection list and set + * sk->sk_user_data to 0 before releasing connection object. + */ + kernel_sock_shutdown(con->sock, SHUT_RDWR); + + conn_put(con); + } +} + +static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) +{ + struct tipc_conn *con; + int ret; + + con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC); + if (!con) + return ERR_PTR(-ENOMEM); + + kref_init(&con->kref); + INIT_LIST_HEAD(&con->outqueue); + spin_lock_init(&con->outqueue_lock); + INIT_WORK(&con->swork, tipc_send_work); + INIT_WORK(&con->rwork, tipc_recv_work); + + spin_lock_bh(&s->idr_lock); + ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC); + if (ret < 0) { + kfree(con); + spin_unlock_bh(&s->idr_lock); + return ERR_PTR(-ENOMEM); + } + con->conid = ret; + s->idr_in_use++; + spin_unlock_bh(&s->idr_lock); + + set_bit(CF_CONNECTED, &con->flags); + con->server = s; + + return con; +} + +static int tipc_receive_from_sock(struct tipc_conn *con) +{ + struct msghdr msg = {}; + struct tipc_server *s = con->server; + struct sockaddr_tipc addr; + struct kvec iov; + void *buf; + int ret; + + buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC); + if (!buf) { + ret = -ENOMEM; + goto out_close; + } + + iov.iov_base = buf; + iov.iov_len = s->max_rcvbuf_size; + msg.msg_name = &addr; + ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, + MSG_DONTWAIT); + if (ret <= 0) { + kmem_cache_free(s->rcvbuf_cache, buf); + goto out_close; + } + + s->tipc_conn_recvmsg(con->conid, &addr, con->usr_data, buf, ret); + + kmem_cache_free(s->rcvbuf_cache, buf); + + return 0; + +out_close: + if (ret != -EWOULDBLOCK) + tipc_close_conn(con); + else if (ret == 0) + /* Don't return success if we really got EOF */ + ret = -EAGAIN; + + return ret; +} + +static int tipc_accept_from_sock(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + struct socket *sock = con->sock; + struct socket *newsock; + struct tipc_conn *newcon; + int ret; + + ret = tipc_sock_accept_local(sock, &newsock, O_NONBLOCK); + if (ret < 0) + return ret; + + newcon = tipc_alloc_conn(con->server); + if (IS_ERR(newcon)) { + ret = PTR_ERR(newcon); + sock_release(newsock); + return ret; + } + + newcon->rx_action = tipc_receive_from_sock; + tipc_register_callbacks(newsock, newcon); + + /* Notify that new connection is incoming */ + newcon->usr_data = s->tipc_conn_new(newcon->conid); + + /* Wake up receive process in case of 'SYN+' message */ + newsock->sk->sk_data_ready(newsock->sk, 0); + return ret; +} + +static struct socket *tipc_create_listen_sock(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + struct socket *sock = NULL; + int ret; + + ret = tipc_sock_create_local(s->type, &sock); + if (ret < 0) + return NULL; + ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, + (char *)&s->imp, sizeof(s->imp)); + if (ret < 0) + goto create_err; + ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr)); + if (ret < 0) + goto create_err; + + switch (s->type) { + case SOCK_STREAM: + case SOCK_SEQPACKET: + con->rx_action = tipc_accept_from_sock; + + ret = kernel_listen(sock, 0); + if (ret < 0) + goto create_err; + break; + case SOCK_DGRAM: + case SOCK_RDM: + con->rx_action = tipc_receive_from_sock; + break; + default: + pr_err("Unknown socket type %d\n", s->type); + goto create_err; + } + return sock; + +create_err: + sock_release(sock); + con->sock = NULL; + return NULL; +} + +static int tipc_open_listening_sock(struct tipc_server *s) +{ + struct socket *sock; + struct tipc_conn *con; + + con = tipc_alloc_conn(s); + if (IS_ERR(con)) + return PTR_ERR(con); + + sock = tipc_create_listen_sock(con); + if (!sock) + return -EINVAL; + + tipc_register_callbacks(sock, con); + return 0; +} + +static struct outqueue_entry *tipc_alloc_entry(void *data, int len) +{ + struct outqueue_entry *entry; + void *buf; + + entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC); + if (!entry) + return NULL; + + buf = kmalloc(len, GFP_ATOMIC); + if (!buf) { + kfree(entry); + return NULL; + } + + memcpy(buf, data, len); + entry->iov.iov_base = buf; + entry->iov.iov_len = len; + + return entry; +} + +static void tipc_free_entry(struct outqueue_entry *e) +{ + kfree(e->iov.iov_base); + kfree(e); +} + +static void tipc_clean_outqueues(struct tipc_conn *con) +{ + struct outqueue_entry *e, *safe; + + spin_lock_bh(&con->outqueue_lock); + list_for_each_entry_safe(e, safe, &con->outqueue, list) { + list_del(&e->list); + tipc_free_entry(e); + } + spin_unlock_bh(&con->outqueue_lock); +} + +int tipc_conn_sendmsg(struct tipc_server *s, int conid, + struct sockaddr_tipc *addr, void *data, size_t len) +{ + struct outqueue_entry *e; + struct tipc_conn *con; + + con = tipc_conn_lookup(s, conid); + if (!con) + return -EINVAL; + + e = tipc_alloc_entry(data, len); + if (!e) { + conn_put(con); + return -ENOMEM; + } + + if (addr) + memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc)); + + spin_lock_bh(&con->outqueue_lock); + list_add_tail(&e->list, &con->outqueue); + spin_unlock_bh(&con->outqueue_lock); + + if (test_bit(CF_CONNECTED, &con->flags)) + if (!queue_work(s->send_wq, &con->swork)) + conn_put(con); + + return 0; +} + +void tipc_conn_terminate(struct tipc_server *s, int conid) +{ + struct tipc_conn *con; + + con = tipc_conn_lookup(s, conid); + if (con) { + tipc_close_conn(con); + conn_put(con); + } +} + +static void tipc_send_to_sock(struct tipc_conn *con) +{ + int count = 0; + struct tipc_server *s = con->server; + struct outqueue_entry *e; + struct msghdr msg; + int ret; + + spin_lock_bh(&con->outqueue_lock); + while (1) { + e = list_entry(con->outqueue.next, struct outqueue_entry, + list); + if ((struct list_head *) e == &con->outqueue) + break; + spin_unlock_bh(&con->outqueue_lock); + + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; + + if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { + msg.msg_name = &e->dest; + msg.msg_namelen = sizeof(struct sockaddr_tipc); + } + ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, + e->iov.iov_len); + if (ret == -EWOULDBLOCK || ret == 0) { + cond_resched(); + goto out; + } else if (ret < 0) { + goto send_err; + } + + /* Don't starve users filling buffers */ + if (++count >= MAX_SEND_MSG_COUNT) { + cond_resched(); + count = 0; + } + + spin_lock_bh(&con->outqueue_lock); + list_del(&e->list); + tipc_free_entry(e); + } + spin_unlock_bh(&con->outqueue_lock); +out: + return; + +send_err: + tipc_close_conn(con); +} + +static void tipc_recv_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, rwork); + int count = 0; + + while (test_bit(CF_CONNECTED, &con->flags)) { + if (con->rx_action(con)) + break; + + /* Don't flood Rx machine */ + if (++count >= MAX_RECV_MSG_COUNT) { + cond_resched(); + count = 0; + } + } + conn_put(con); +} + +static void tipc_send_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, swork); + + if (test_bit(CF_CONNECTED, &con->flags)) + tipc_send_to_sock(con); + + conn_put(con); +} + +static void tipc_work_stop(struct tipc_server *s) +{ + destroy_workqueue(s->rcv_wq); + destroy_workqueue(s->send_wq); +} + +static int tipc_work_start(struct tipc_server *s) +{ + s->rcv_wq = alloc_workqueue("tipc_rcv", WQ_UNBOUND, 1); + if (!s->rcv_wq) { + pr_err("can't start tipc receive workqueue\n"); + return -ENOMEM; + } + + s->send_wq = alloc_workqueue("tipc_send", WQ_UNBOUND, 1); + if (!s->send_wq) { + pr_err("can't start tipc send workqueue\n"); + destroy_workqueue(s->rcv_wq); + return -ENOMEM; + } + + return 0; +} + +int tipc_server_start(struct tipc_server *s) +{ + int ret; + + spin_lock_init(&s->idr_lock); + idr_init(&s->conn_idr); + s->idr_in_use = 0; + + s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!s->rcvbuf_cache) + return -ENOMEM; + + ret = tipc_work_start(s); + if (ret < 0) { + kmem_cache_destroy(s->rcvbuf_cache); + return ret; + } + s->enabled = 1; + + return tipc_open_listening_sock(s); +} + +void tipc_server_stop(struct tipc_server *s) +{ + struct tipc_conn *con; + int total = 0; + int id; + + if (!s->enabled) + return; + + s->enabled = 0; + spin_lock_bh(&s->idr_lock); + for (id = 0; total < s->idr_in_use; id++) { + con = idr_find(&s->conn_idr, id); + if (con) { + total++; + spin_unlock_bh(&s->idr_lock); + tipc_close_conn(con); + spin_lock_bh(&s->idr_lock); + } + } + spin_unlock_bh(&s->idr_lock); + + tipc_work_stop(s); + kmem_cache_destroy(s->rcvbuf_cache); + idr_destroy(&s->conn_idr); +} diff --git a/net/tipc/server.h b/net/tipc/server.h new file mode 100644 index 000000000000..98b23f20bc0f --- /dev/null +++ b/net/tipc/server.h @@ -0,0 +1,94 @@ +/* + * net/tipc/server.h: Include file for TIPC server code + * + * Copyright (c) 2012-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_SERVER_H +#define _TIPC_SERVER_H + +#include "core.h" + +#define TIPC_SERVER_NAME_LEN 32 + +/** + * struct tipc_server - TIPC server structure + * @conn_idr: identifier set of connection + * @idr_lock: protect the connection identifier set + * @idr_in_use: amount of allocated identifier entry + * @rcvbuf_cache: memory cache of server receive buffer + * @rcv_wq: receive workqueue + * @send_wq: send workqueue + * @max_rcvbuf_size: maximum permitted receive message length + * @tipc_conn_new: callback will be called when new connection is incoming + * @tipc_conn_shutdown: callback will be called when connection is shut down + * @tipc_conn_recvmsg: callback will be called when message arrives + * @saddr: TIPC server address + * @name: server name + * @imp: message importance + * @type: socket type + * @enabled: identify whether server is launched or not + */ +struct tipc_server { + struct idr conn_idr; + spinlock_t idr_lock; + int idr_in_use; + struct kmem_cache *rcvbuf_cache; + struct workqueue_struct *rcv_wq; + struct workqueue_struct *send_wq; + int max_rcvbuf_size; + void *(*tipc_conn_new) (int conid); + void (*tipc_conn_shutdown) (int conid, void *usr_data); + void (*tipc_conn_recvmsg) (int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len); + struct sockaddr_tipc *saddr; + const char name[TIPC_SERVER_NAME_LEN]; + int imp; + int type; + int enabled; +}; + +int tipc_conn_sendmsg(struct tipc_server *s, int conid, + struct sockaddr_tipc *addr, void *data, size_t len); + +/** + * tipc_conn_terminate - terminate connection with server + * + * Note: Must call it in process context since it might sleep + */ +void tipc_conn_terminate(struct tipc_server *s, int conid); + +int tipc_server_start(struct tipc_server *s); + +void tipc_server_stop(struct tipc_server *s); + +#endif diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 515ce38e4f4c..ce8249c76827 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2,7 +2,7 @@ * net/tipc/socket.c: TIPC socket API * * Copyright (c) 2001-2007, 2012 Ericsson AB - * Copyright (c) 2004-2008, 2010-2012, Wind River Systems + * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,8 +43,6 @@ #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ -#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ - SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ struct tipc_sock { @@ -65,12 +63,15 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); static void wakeupdispatch(struct tipc_port *tport); static void tipc_data_ready(struct sock *sk, int len); static void tipc_write_space(struct sock *sk); +static int release(struct socket *sock); +static int accept(struct socket *sock, struct socket *new_sock, int flags); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; static const struct proto_ops msg_ops; static struct proto tipc_proto; +static struct proto tipc_proto_kern; static int sockets_enabled; @@ -143,7 +144,7 @@ static void reject_rx_queue(struct sock *sk) } /** - * tipc_create - create a TIPC socket + * tipc_sk_create - create a TIPC socket * @net: network namespace (must be default network) * @sock: pre-allocated socket structure * @protocol: protocol indicator (must be 0) @@ -154,8 +155,8 @@ static void reject_rx_queue(struct sock *sk) * * Returns 0 on success, errno otherwise */ -static int tipc_create(struct net *net, struct socket *sock, int protocol, - int kern) +static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, + int kern) { const struct proto_ops *ops; socket_state state; @@ -185,13 +186,17 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, } /* Allocate socket's protocol area */ - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); + if (!kern) + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); + else + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern); + if (sk == NULL) return -ENOMEM; /* Allocate TIPC port for socket to use */ - tp_ptr = tipc_createport_raw(sk, &dispatch, &wakeupdispatch, - TIPC_LOW_IMPORTANCE); + tp_ptr = tipc_createport(sk, &dispatch, &wakeupdispatch, + TIPC_LOW_IMPORTANCE); if (unlikely(!tp_ptr)) { sk_free(sk); return -ENOMEM; @@ -203,6 +208,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); sk->sk_backlog_rcv = backlog_rcv; + sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; tipc_sk(sk)->p = tp_ptr; @@ -220,6 +226,78 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, } /** + * tipc_sock_create_local - create TIPC socket from inside TIPC module + * @type: socket type - SOCK_RDM or SOCK_SEQPACKET + * + * We cannot use sock_creat_kern here because it bumps module user count. + * Since socket owner and creator is the same module we must make sure + * that module count remains zero for module local sockets, otherwise + * we cannot do rmmod. + * + * Returns 0 on success, errno otherwise + */ +int tipc_sock_create_local(int type, struct socket **res) +{ + int rc; + struct sock *sk; + + rc = sock_create_lite(AF_TIPC, type, 0, res); + if (rc < 0) { + pr_err("Failed to create kernel socket\n"); + return rc; + } + tipc_sk_create(&init_net, *res, 0, 1); + + sk = (*res)->sk; + + return 0; +} + +/** + * tipc_sock_release_local - release socket created by tipc_sock_create_local + * @sock: the socket to be released. + * + * Module reference count is not incremented when such sockets are created, + * so we must keep it from being decremented when they are released. + */ +void tipc_sock_release_local(struct socket *sock) +{ + release(sock); + sock->ops = NULL; + sock_release(sock); +} + +/** + * tipc_sock_accept_local - accept a connection on a socket created + * with tipc_sock_create_local. Use this function to avoid that + * module reference count is inadvertently incremented. + * + * @sock: the accepting socket + * @newsock: reference to the new socket to be created + * @flags: socket flags + */ + +int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, + int flags) +{ + struct sock *sk = sock->sk; + int ret; + + ret = sock_create_lite(sk->sk_family, sk->sk_type, + sk->sk_protocol, newsock); + if (ret < 0) + return ret; + + ret = accept(sock, *newsock, flags); + if (ret < 0) { + sock_release(*newsock); + return ret; + } + (*newsock)->ops = sock->ops; + return ret; +} + +/** * release - destroy a TIPC socket * @sock: socket to destroy * @@ -324,7 +402,9 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) else if (addr->addrtype != TIPC_ADDR_NAMESEQ) return -EAFNOSUPPORT; - if (addr->addr.nameseq.type < TIPC_RESERVED_TYPES) + if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && + (addr->addr.nameseq.type != TIPC_TOP_SRV) && + (addr->addr.nameseq.type != TIPC_CFG_SRV)) return -EACCES; return (addr->scope > 0) ? @@ -519,8 +599,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, res = -EISCONN; goto exit; } - if ((tport->published) || - ((sock->type == SOCK_STREAM) && (total_len != 0))) { + if (tport->published) { res = -EOPNOTSUPP; goto exit; } @@ -810,7 +889,7 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) * Returns 0 if successful, otherwise errno */ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, - struct tipc_port *tport) + struct tipc_port *tport) { u32 anc_data[3]; u32 err; @@ -1011,8 +1090,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, lock_sock(sk); - if (unlikely((sock->state == SS_UNCONNECTED) || - (sock->state == SS_CONNECTING))) { + if (unlikely((sock->state == SS_UNCONNECTED))) { res = -ENOTCONN; goto exit; } @@ -1233,10 +1311,10 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) * For all connectionless messages, by default new queue limits are * as belows: * - * TIPC_LOW_IMPORTANCE (5MB) - * TIPC_MEDIUM_IMPORTANCE (10MB) - * TIPC_HIGH_IMPORTANCE (20MB) - * TIPC_CRITICAL_IMPORTANCE (40MB) + * TIPC_LOW_IMPORTANCE (4 MB) + * TIPC_MEDIUM_IMPORTANCE (8 MB) + * TIPC_HIGH_IMPORTANCE (16 MB) + * TIPC_CRITICAL_IMPORTANCE (32 MB) * * Returns overload limit according to corresponding message importance */ @@ -1246,9 +1324,10 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) unsigned int limit; if (msg_connected(msg)) - limit = CONN_OVERLOAD_LIMIT; + limit = sysctl_tipc_rmem[2]; else - limit = sk->sk_rcvbuf << (msg_importance(msg) + 5); + limit = sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << + msg_importance(msg); return limit; } @@ -1327,7 +1406,7 @@ static int backlog_rcv(struct sock *sk, struct sk_buff *buf) */ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) { - struct sock *sk = (struct sock *)tport->usr_handle; + struct sock *sk = tport->sk; u32 res; /* @@ -1358,7 +1437,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) */ static void wakeupdispatch(struct tipc_port *tport) { - struct sock *sk = (struct sock *)tport->usr_handle; + struct sock *sk = tport->sk; sk->sk_write_space(sk); } @@ -1531,7 +1610,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) buf = skb_peek(&sk->sk_receive_queue); - res = tipc_create(sock_net(sock->sk), new_sock, 0, 0); + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; @@ -1657,8 +1736,8 @@ restart: * * Returns 0 on success, errno otherwise */ -static int setsockopt(struct socket *sock, - int lvl, int opt, char __user *ov, unsigned int ol) +static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov, + unsigned int ol) { struct sock *sk = sock->sk; struct tipc_port *tport = tipc_sk_port(sk); @@ -1716,8 +1795,8 @@ static int setsockopt(struct socket *sock, * * Returns 0 on success, errno otherwise */ -static int getsockopt(struct socket *sock, - int lvl, int opt, char __user *ov, int __user *ol) +static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov, + int __user *ol) { struct sock *sk = sock->sk; struct tipc_port *tport = tipc_sk_port(sk); @@ -1841,13 +1920,20 @@ static const struct proto_ops stream_ops = { static const struct net_proto_family tipc_family_ops = { .owner = THIS_MODULE, .family = AF_TIPC, - .create = tipc_create + .create = tipc_sk_create }; static struct proto tipc_proto = { .name = "TIPC", .owner = THIS_MODULE, - .obj_size = sizeof(struct tipc_sock) + .obj_size = sizeof(struct tipc_sock), + .sysctl_rmem = sysctl_tipc_rmem +}; + +static struct proto tipc_proto_kern = { + .name = "TIPC", + .obj_size = sizeof(struct tipc_sock), + .sysctl_rmem = sysctl_tipc_rmem }; /** diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 6b42d47029af..d38bb45d82e9 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -2,7 +2,7 @@ * net/tipc/subscr.c: TIPC network topology service * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005-2007, 2010-2011, Wind River Systems + * Copyright (c) 2005-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,33 +41,42 @@ /** * struct tipc_subscriber - TIPC network topology subscriber - * @port_ref: object reference to server port connecting to subscriber - * @lock: pointer to spinlock controlling access to subscriber's server port - * @subscriber_list: adjacent subscribers in top. server's list of subscribers + * @conid: connection identifier to server connecting to subscriber + * @lock: controll access to subscriber * @subscription_list: list of subscription objects for this subscriber */ struct tipc_subscriber { - u32 port_ref; - spinlock_t *lock; - struct list_head subscriber_list; + int conid; + spinlock_t lock; struct list_head subscription_list; }; -/** - * struct top_srv - TIPC network topology subscription service - * @setup_port: reference to TIPC port that handles subscription requests - * @subscription_count: number of active subscriptions (not subscribers!) - * @subscriber_list: list of ports subscribing to service - * @lock: spinlock govering access to subscriber list - */ -struct top_srv { - u32 setup_port; - atomic_t subscription_count; - struct list_head subscriber_list; - spinlock_t lock; +static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len); +static void *subscr_named_msg_event(int conid); +static void subscr_conn_shutdown_event(int conid, void *usr_data); + +static atomic_t subscription_count = ATOMIC_INIT(0); + +static struct sockaddr_tipc topsrv_addr __read_mostly = { + .family = AF_TIPC, + .addrtype = TIPC_ADDR_NAMESEQ, + .addr.nameseq.type = TIPC_TOP_SRV, + .addr.nameseq.lower = TIPC_TOP_SRV, + .addr.nameseq.upper = TIPC_TOP_SRV, + .scope = TIPC_NODE_SCOPE }; -static struct top_srv topsrv; +static struct tipc_server topsrv __read_mostly = { + .saddr = &topsrv_addr, + .imp = TIPC_CRITICAL_IMPORTANCE, + .type = SOCK_SEQPACKET, + .max_rcvbuf_size = sizeof(struct tipc_subscr), + .name = "topology_server", + .tipc_conn_recvmsg = subscr_conn_msg_event, + .tipc_conn_new = subscr_named_msg_event, + .tipc_conn_shutdown = subscr_conn_shutdown_event, +}; /** * htohl - convert value to endianness used by destination @@ -81,20 +90,13 @@ static u32 htohl(u32 in, int swap) return swap ? swab32(in) : in; } -/** - * subscr_send_event - send a message containing a tipc_event to the subscriber - * - * Note: Must not hold subscriber's server port lock, since tipc_send() will - * try to take the lock if the message is rejected and returned! - */ -static void subscr_send_event(struct tipc_subscription *sub, - u32 found_lower, - u32 found_upper, - u32 event, - u32 port_ref, +static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, u32 node) { - struct iovec msg_sect; + struct tipc_subscriber *subscriber = sub->subscriber; + struct kvec msg_sect; + int ret; msg_sect.iov_base = (void *)&sub->evt; msg_sect.iov_len = sizeof(struct tipc_event); @@ -104,7 +106,10 @@ static void subscr_send_event(struct tipc_subscription *sub, sub->evt.found_upper = htohl(found_upper, sub->swap); sub->evt.port.ref = htohl(port_ref, sub->swap); sub->evt.port.node = htohl(node, sub->swap); - tipc_send(sub->server_ref, 1, &msg_sect, msg_sect.iov_len); + ret = tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, + msg_sect.iov_base, msg_sect.iov_len); + if (ret < 0) + pr_err("Sending subscription event failed, no memory\n"); } /** @@ -112,10 +117,8 @@ static void subscr_send_event(struct tipc_subscription *sub, * * Returns 1 if there is overlap, otherwise 0. */ -int tipc_subscr_overlap(struct tipc_subscription *sub, - u32 found_lower, +int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper) - { if (found_lower < sub->seq.lower) found_lower = sub->seq.lower; @@ -131,13 +134,9 @@ int tipc_subscr_overlap(struct tipc_subscription *sub, * * Protected by nameseq.lock in name_table.c */ -void tipc_subscr_report_overlap(struct tipc_subscription *sub, - u32 found_lower, - u32 found_upper, - u32 event, - u32 port_ref, - u32 node, - int must) +void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, + u32 node, int must) { if (!tipc_subscr_overlap(sub, found_lower, found_upper)) return; @@ -147,21 +146,24 @@ void tipc_subscr_report_overlap(struct tipc_subscription *sub, subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); } -/** - * subscr_timeout - subscription timeout has occurred - */ static void subscr_timeout(struct tipc_subscription *sub) { - struct tipc_port *server_port; + struct tipc_subscriber *subscriber = sub->subscriber; + + /* The spin lock per subscriber is used to protect its members */ + spin_lock_bh(&subscriber->lock); - /* Validate server port reference (in case subscriber is terminating) */ - server_port = tipc_port_lock(sub->server_ref); - if (server_port == NULL) + /* Validate if the connection related to the subscriber is + * closed (in case subscriber is terminating) + */ + if (subscriber->conid == 0) { + spin_unlock_bh(&subscriber->lock); return; + } /* Validate timeout (in case subscription is being cancelled) */ if (sub->timeout == TIPC_WAIT_FOREVER) { - tipc_port_unlock(server_port); + spin_unlock_bh(&subscriber->lock); return; } @@ -171,8 +173,7 @@ static void subscr_timeout(struct tipc_subscription *sub) /* Unlink subscription from subscriber */ list_del(&sub->subscription_list); - /* Release subscriber's server port */ - tipc_port_unlock(server_port); + spin_unlock_bh(&subscriber->lock); /* Notify subscriber of timeout */ subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, @@ -181,64 +182,54 @@ static void subscr_timeout(struct tipc_subscription *sub) /* Now destroy subscription */ k_term_timer(&sub->timer); kfree(sub); - atomic_dec(&topsrv.subscription_count); + atomic_dec(&subscription_count); } /** * subscr_del - delete a subscription within a subscription list * - * Called with subscriber port locked. + * Called with subscriber lock held. */ static void subscr_del(struct tipc_subscription *sub) { tipc_nametbl_unsubscribe(sub); list_del(&sub->subscription_list); kfree(sub); - atomic_dec(&topsrv.subscription_count); + atomic_dec(&subscription_count); } /** * subscr_terminate - terminate communication with a subscriber * - * Called with subscriber port locked. Routine must temporarily release lock - * to enable subscription timeout routine(s) to finish without deadlocking; - * the lock is then reclaimed to allow caller to release it upon return. - * (This should work even in the unlikely event some other thread creates - * a new object reference in the interim that uses this lock; this routine will - * simply wait for it to be released, then claim it.) + * Note: Must call it in process context since it might sleep. */ static void subscr_terminate(struct tipc_subscriber *subscriber) { - u32 port_ref; + tipc_conn_terminate(&topsrv, subscriber->conid); +} + +static void subscr_release(struct tipc_subscriber *subscriber) +{ struct tipc_subscription *sub; struct tipc_subscription *sub_temp; - /* Invalidate subscriber reference */ - port_ref = subscriber->port_ref; - subscriber->port_ref = 0; - spin_unlock_bh(subscriber->lock); + spin_lock_bh(&subscriber->lock); - /* Sever connection to subscriber */ - tipc_shutdown(port_ref); - tipc_deleteport(port_ref); + /* Invalidate subscriber reference */ + subscriber->conid = 0; /* Destroy any existing subscriptions for subscriber */ list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, subscription_list) { if (sub->timeout != TIPC_WAIT_FOREVER) { + spin_unlock_bh(&subscriber->lock); k_cancel_timer(&sub->timer); k_term_timer(&sub->timer); + spin_lock_bh(&subscriber->lock); } subscr_del(sub); } - - /* Remove subscriber from topology server's subscriber list */ - spin_lock_bh(&topsrv.lock); - list_del(&subscriber->subscriber_list); - spin_unlock_bh(&topsrv.lock); - - /* Reclaim subscriber lock */ - spin_lock_bh(subscriber->lock); + spin_unlock_bh(&subscriber->lock); /* Now destroy subscriber */ kfree(subscriber); @@ -247,7 +238,7 @@ static void subscr_terminate(struct tipc_subscriber *subscriber) /** * subscr_cancel - handle subscription cancellation request * - * Called with subscriber port locked. Routine must temporarily release lock + * Called with subscriber lock held. Routine must temporarily release lock * to enable the subscription timeout routine to finish without deadlocking; * the lock is then reclaimed to allow caller to release it upon return. * @@ -274,10 +265,10 @@ static void subscr_cancel(struct tipc_subscr *s, /* Cancel subscription timer (if used), then delete subscription */ if (sub->timeout != TIPC_WAIT_FOREVER) { sub->timeout = TIPC_WAIT_FOREVER; - spin_unlock_bh(subscriber->lock); + spin_unlock_bh(&subscriber->lock); k_cancel_timer(&sub->timer); k_term_timer(&sub->timer); - spin_lock_bh(subscriber->lock); + spin_lock_bh(&subscriber->lock); } subscr_del(sub); } @@ -285,7 +276,7 @@ static void subscr_cancel(struct tipc_subscr *s, /** * subscr_subscribe - create subscription for subscriber * - * Called with subscriber port locked. + * Called with subscriber lock held. */ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, struct tipc_subscriber *subscriber) @@ -304,7 +295,7 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, } /* Refuse subscription if global limit exceeded */ - if (atomic_read(&topsrv.subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { + if (atomic_read(&subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { pr_warn("Subscription rejected, limit reached (%u)\n", TIPC_MAX_SUBSCRIPTIONS); subscr_terminate(subscriber); @@ -335,10 +326,10 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, } INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); - sub->server_ref = subscriber->port_ref; + sub->subscriber = subscriber; sub->swap = swap; memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); - atomic_inc(&topsrv.subscription_count); + atomic_inc(&subscription_count); if (sub->timeout != TIPC_WAIT_FOREVER) { k_init_timer(&sub->timer, (Handler)subscr_timeout, (unsigned long)sub); @@ -348,196 +339,51 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, return sub; } -/** - * subscr_conn_shutdown_event - handle termination request from subscriber - * - * Called with subscriber's server port unlocked. - */ -static void subscr_conn_shutdown_event(void *usr_handle, - u32 port_ref, - struct sk_buff **buf, - unsigned char const *data, - unsigned int size, - int reason) +/* Handle one termination request for the subscriber */ +static void subscr_conn_shutdown_event(int conid, void *usr_data) { - struct tipc_subscriber *subscriber = usr_handle; - spinlock_t *subscriber_lock; - - if (tipc_port_lock(port_ref) == NULL) - return; - - subscriber_lock = subscriber->lock; - subscr_terminate(subscriber); - spin_unlock_bh(subscriber_lock); + subscr_release((struct tipc_subscriber *)usr_data); } -/** - * subscr_conn_msg_event - handle new subscription request from subscriber - * - * Called with subscriber's server port unlocked. - */ -static void subscr_conn_msg_event(void *usr_handle, - u32 port_ref, - struct sk_buff **buf, - const unchar *data, - u32 size) +/* Handle one request to create a new subscription for the subscriber */ +static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len) { - struct tipc_subscriber *subscriber = usr_handle; - spinlock_t *subscriber_lock; + struct tipc_subscriber *subscriber = usr_data; struct tipc_subscription *sub; - /* - * Lock subscriber's server port (& make a local copy of lock pointer, - * in case subscriber is deleted while processing subscription request) - */ - if (tipc_port_lock(port_ref) == NULL) - return; - - subscriber_lock = subscriber->lock; - - if (size != sizeof(struct tipc_subscr)) { - subscr_terminate(subscriber); - spin_unlock_bh(subscriber_lock); - } else { - sub = subscr_subscribe((struct tipc_subscr *)data, subscriber); - spin_unlock_bh(subscriber_lock); - if (sub != NULL) { - - /* - * We must release the server port lock before adding a - * subscription to the name table since TIPC needs to be - * able to (re)acquire the port lock if an event message - * issued by the subscription process is rejected and - * returned. The subscription cannot be deleted while - * it is being added to the name table because: - * a) the single-threading of the native API port code - * ensures the subscription cannot be cancelled and - * the subscriber connection cannot be broken, and - * b) the name table lock ensures the subscription - * timeout code cannot delete the subscription, - * so the subscription object is still protected. - */ - tipc_nametbl_subscribe(sub); - } - } + spin_lock_bh(&subscriber->lock); + sub = subscr_subscribe((struct tipc_subscr *)buf, subscriber); + if (sub) + tipc_nametbl_subscribe(sub); + spin_unlock_bh(&subscriber->lock); } -/** - * subscr_named_msg_event - handle request to establish a new subscriber - */ -static void subscr_named_msg_event(void *usr_handle, - u32 port_ref, - struct sk_buff **buf, - const unchar *data, - u32 size, - u32 importance, - struct tipc_portid const *orig, - struct tipc_name_seq const *dest) + +/* Handle one request to establish a new subscriber */ +static void *subscr_named_msg_event(int conid) { struct tipc_subscriber *subscriber; - u32 server_port_ref; /* Create subscriber object */ subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC); if (subscriber == NULL) { pr_warn("Subscriber rejected, no memory\n"); - return; + return NULL; } INIT_LIST_HEAD(&subscriber->subscription_list); - INIT_LIST_HEAD(&subscriber->subscriber_list); - - /* Create server port & establish connection to subscriber */ - tipc_createport(subscriber, - importance, - NULL, - NULL, - subscr_conn_shutdown_event, - NULL, - NULL, - subscr_conn_msg_event, - NULL, - &subscriber->port_ref); - if (subscriber->port_ref == 0) { - pr_warn("Subscriber rejected, unable to create port\n"); - kfree(subscriber); - return; - } - tipc_connect(subscriber->port_ref, orig); - - /* Lock server port (& save lock address for future use) */ - subscriber->lock = tipc_port_lock(subscriber->port_ref)->lock; - - /* Add subscriber to topology server's subscriber list */ - spin_lock_bh(&topsrv.lock); - list_add(&subscriber->subscriber_list, &topsrv.subscriber_list); - spin_unlock_bh(&topsrv.lock); - - /* Unlock server port */ - server_port_ref = subscriber->port_ref; - spin_unlock_bh(subscriber->lock); - - /* Send an ACK- to complete connection handshaking */ - tipc_send(server_port_ref, 0, NULL, 0); + subscriber->conid = conid; + spin_lock_init(&subscriber->lock); - /* Handle optional subscription request */ - if (size != 0) { - subscr_conn_msg_event(subscriber, server_port_ref, - buf, data, size); - } + return (void *)subscriber; } int tipc_subscr_start(void) { - struct tipc_name_seq seq = {TIPC_TOP_SRV, TIPC_TOP_SRV, TIPC_TOP_SRV}; - int res; - - spin_lock_init(&topsrv.lock); - INIT_LIST_HEAD(&topsrv.subscriber_list); - - res = tipc_createport(NULL, - TIPC_CRITICAL_IMPORTANCE, - NULL, - NULL, - NULL, - NULL, - subscr_named_msg_event, - NULL, - NULL, - &topsrv.setup_port); - if (res) - goto failed; - - res = tipc_publish(topsrv.setup_port, TIPC_NODE_SCOPE, &seq); - if (res) { - tipc_deleteport(topsrv.setup_port); - topsrv.setup_port = 0; - goto failed; - } - - return 0; - -failed: - pr_err("Failed to create subscription service\n"); - return res; + return tipc_server_start(&topsrv); } void tipc_subscr_stop(void) { - struct tipc_subscriber *subscriber; - struct tipc_subscriber *subscriber_temp; - spinlock_t *subscriber_lock; - - if (topsrv.setup_port) { - tipc_deleteport(topsrv.setup_port); - topsrv.setup_port = 0; - - list_for_each_entry_safe(subscriber, subscriber_temp, - &topsrv.subscriber_list, - subscriber_list) { - subscriber_lock = subscriber->lock; - spin_lock_bh(subscriber_lock); - subscr_terminate(subscriber); - spin_unlock_bh(subscriber_lock); - } - } + tipc_server_stop(&topsrv); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 218d2e07f0cc..393e417bee3f 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -2,7 +2,7 @@ * net/tipc/subscr.h: Include file for TIPC network topology service * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2005-2007, 2012-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,10 +37,14 @@ #ifndef _TIPC_SUBSCR_H #define _TIPC_SUBSCR_H +#include "server.h" + struct tipc_subscription; +struct tipc_subscriber; /** * struct tipc_subscription - TIPC network topology subscription object + * @subscriber: pointer to its subscriber * @seq: name sequence associated with subscription * @timeout: duration of subscription (in ms) * @filter: event filtering to be done for subscription @@ -52,28 +56,23 @@ struct tipc_subscription; * @evt: template for events generated by subscription */ struct tipc_subscription { + struct tipc_subscriber *subscriber; struct tipc_name_seq seq; u32 timeout; u32 filter; struct timer_list timer; struct list_head nameseq_list; struct list_head subscription_list; - u32 server_ref; int swap; struct tipc_event evt; }; -int tipc_subscr_overlap(struct tipc_subscription *sub, - u32 found_lower, +int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper); -void tipc_subscr_report_overlap(struct tipc_subscription *sub, - u32 found_lower, - u32 found_upper, - u32 event, - u32 port_ref, - u32 node, - int must_report); +void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, + u32 node, int must); int tipc_subscr_start(void); diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c new file mode 100644 index 000000000000..f3fef93325a8 --- /dev/null +++ b/net/tipc/sysctl.c @@ -0,0 +1,64 @@ +/* + * net/tipc/sysctl.c: sysctl interface to TIPC subsystem + * + * Copyright (c) 2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" + +#include <linux/sysctl.h> + +static struct ctl_table_header *tipc_ctl_hdr; + +static struct ctl_table tipc_table[] = { + { + .procname = "tipc_rmem", + .data = &sysctl_tipc_rmem, + .maxlen = sizeof(sysctl_tipc_rmem), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; + +int tipc_register_sysctl(void) +{ + tipc_ctl_hdr = register_net_sysctl(&init_net, "net/tipc", tipc_table); + if (tipc_ctl_hdr == NULL) + return -ENOMEM; + return 0; +} + +void tipc_unregister_sysctl(void) +{ + unregister_net_sysctl_table(tipc_ctl_hdr); +} diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 8800604c93f4..b3d515021b74 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -15,7 +15,7 @@ #include <net/af_unix.h> -static ctl_table unix_table[] = { +static struct ctl_table unix_table[] = { { .procname = "max_dgram_qlen", .data = &init_net.unx.sysctl_max_dgram_qlen, diff --git a/net/wireless/core.c b/net/wireless/core.c index 73405e00c800..e4df77490229 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -34,13 +34,12 @@ MODULE_AUTHOR("Johannes Berg"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("wireless configuration support"); +MODULE_ALIAS_GENL_FAMILY(NL80211_GENL_NAME); -/* RCU-protected (and cfg80211_mutex for writers) */ +/* RCU-protected (and RTNL for writers) */ LIST_HEAD(cfg80211_rdev_list); int cfg80211_rdev_list_generation; -DEFINE_MUTEX(cfg80211_mutex); - /* for debugfs */ static struct dentry *ieee80211_debugfs_dir; @@ -52,12 +51,11 @@ module_param(cfg80211_disable_40mhz_24ghz, bool, 0644); MODULE_PARM_DESC(cfg80211_disable_40mhz_24ghz, "Disable 40MHz support in the 2.4GHz band"); -/* requires cfg80211_mutex to be held! */ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) { struct cfg80211_registered_device *result = NULL, *rdev; - assert_cfg80211_lock(); + ASSERT_RTNL(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (rdev->wiphy_idx == wiphy_idx) { @@ -76,12 +74,11 @@ int get_wiphy_idx(struct wiphy *wiphy) return rdev->wiphy_idx; } -/* requires cfg80211_rdev_mutex to be held! */ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) { struct cfg80211_registered_device *rdev; - assert_cfg80211_lock(); + ASSERT_RTNL(); rdev = cfg80211_rdev_by_wiphy_idx(wiphy_idx); if (!rdev) @@ -89,35 +86,13 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) return &rdev->wiphy; } -struct cfg80211_registered_device * -cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) -{ - struct cfg80211_registered_device *rdev = ERR_PTR(-ENODEV); - struct net_device *dev; - - mutex_lock(&cfg80211_mutex); - dev = dev_get_by_index(net, ifindex); - if (!dev) - goto out; - if (dev->ieee80211_ptr) { - rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); - mutex_lock(&rdev->mtx); - } else - rdev = ERR_PTR(-ENODEV); - dev_put(dev); - out: - mutex_unlock(&cfg80211_mutex); - return rdev; -} - -/* requires cfg80211_mutex to be held */ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, char *newname) { struct cfg80211_registered_device *rdev2; int wiphy_idx, taken = -1, result, digits; - assert_cfg80211_lock(); + ASSERT_RTNL(); /* prohibit calling the thing phy%d when %d is not its number */ sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken); @@ -215,8 +190,7 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { - lockdep_assert_held(&rdev->devlist_mtx); - lockdep_assert_held(&rdev->sched_scan_mtx); + ASSERT_RTNL(); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)) return; @@ -230,18 +204,15 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->opencount--; if (rdev->scan_req && rdev->scan_req->wdev == wdev) { - bool busy = work_busy(&rdev->scan_done_wk); - /* - * If the work isn't pending or running (in which case it would - * be waiting for the lock we hold) the driver didn't properly - * cancel the scan when the interface was removed. In this case - * warn and leak the scan request object to not crash later. + * If the scan request wasn't notified as done, set it + * to aborted and leak it after a warning. The driver + * should have notified us that it ended at the latest + * during rdev_stop_p2p_device(). */ - WARN_ON(!busy); - - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, !busy); + if (WARN_ON(!rdev->scan_req->notified)) + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, !rdev->scan_req->notified); } } @@ -255,8 +226,6 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) rtnl_lock(); - /* read-only iteration need not hold the devlist_mtx */ - list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->netdev) { dev_close(wdev->netdev); @@ -265,12 +234,7 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) /* otherwise, check iftype */ switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: - /* but this requires it */ - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); cfg80211_stop_p2p_device(rdev, wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); break; default: break; @@ -298,10 +262,7 @@ static void cfg80211_event_work(struct work_struct *work) event_work); rtnl_lock(); - cfg80211_lock_rdev(rdev); - cfg80211_process_rdev_events(rdev); - cfg80211_unlock_rdev(rdev); rtnl_unlock(); } @@ -309,7 +270,7 @@ static void cfg80211_event_work(struct work_struct *work) struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) { - static int wiphy_counter; + static atomic_t wiphy_counter = ATOMIC_INIT(0); struct cfg80211_registered_device *rdev; int alloc_size; @@ -331,26 +292,18 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->ops = ops; - mutex_lock(&cfg80211_mutex); - - rdev->wiphy_idx = wiphy_counter++; + rdev->wiphy_idx = atomic_inc_return(&wiphy_counter); if (unlikely(rdev->wiphy_idx < 0)) { - wiphy_counter--; - mutex_unlock(&cfg80211_mutex); /* ugh, wrapped! */ + atomic_dec(&wiphy_counter); kfree(rdev); return NULL; } - mutex_unlock(&cfg80211_mutex); - /* give it a proper name */ dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); - mutex_init(&rdev->mtx); - mutex_init(&rdev->devlist_mtx); - mutex_init(&rdev->sched_scan_mtx); INIT_LIST_HEAD(&rdev->wdev_list); INIT_LIST_HEAD(&rdev->beacon_registrations); spin_lock_init(&rdev->beacon_registrations_lock); @@ -598,11 +551,11 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); - mutex_lock(&cfg80211_mutex); + rtnl_lock(); res = device_add(&rdev->wiphy.dev); if (res) { - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); return res; } @@ -631,25 +584,18 @@ int wiphy_register(struct wiphy *wiphy) } cfg80211_debugfs_rdev_add(rdev); - mutex_unlock(&cfg80211_mutex); - /* - * due to a locking dependency this has to be outside of the - * cfg80211_mutex lock - */ res = rfkill_register(rdev->rfkill); if (res) { device_del(&rdev->wiphy.dev); - mutex_lock(&cfg80211_mutex); debugfs_remove_recursive(rdev->wiphy.debugfsdir); list_del_rcu(&rdev->list); wiphy_regulatory_deregister(wiphy); - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); return res; } - rtnl_lock(); rdev->wiphy.registered = true; rtnl_unlock(); return 0; @@ -679,25 +625,19 @@ void wiphy_unregister(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - rtnl_lock(); - rdev->wiphy.registered = false; - rtnl_unlock(); - - rfkill_unregister(rdev->rfkill); - - /* protect the device list */ - mutex_lock(&cfg80211_mutex); - wait_event(rdev->dev_wait, ({ int __count; - mutex_lock(&rdev->devlist_mtx); + rtnl_lock(); __count = rdev->opencount; - mutex_unlock(&rdev->devlist_mtx); + rtnl_unlock(); __count == 0; })); - mutex_lock(&rdev->devlist_mtx); + rtnl_lock(); + rdev->wiphy.registered = false; + + rfkill_unregister(rdev->rfkill); + BUG_ON(!list_empty(&rdev->wdev_list)); - mutex_unlock(&rdev->devlist_mtx); /* * First remove the hardware from everywhere, this makes @@ -708,20 +648,6 @@ void wiphy_unregister(struct wiphy *wiphy) synchronize_rcu(); /* - * Try to grab rdev->mtx. If a command is still in progress, - * hopefully the driver will refuse it since it's tearing - * down the device already. We wait for this command to complete - * before unlinking the item from the list. - * Note: as codified by the BUG_ON above we cannot get here if - * a virtual interface is still present. Hence, we can only get - * to lock contention here if userspace issues a command that - * identified the hardware by wiphy index. - */ - cfg80211_lock_rdev(rdev); - /* nothing */ - cfg80211_unlock_rdev(rdev); - - /* * If this device got a regulatory hint tell core its * free to listen now to a new shiny device regulatory hint */ @@ -730,15 +656,17 @@ void wiphy_unregister(struct wiphy *wiphy) cfg80211_rdev_list_generation++; device_del(&rdev->wiphy.dev); - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); flush_work(&rdev->scan_done_wk); cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); - if (rdev->wowlan && rdev->ops->set_wakeup) +#ifdef CONFIG_PM + if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) rdev_set_wakeup(rdev, false); +#endif cfg80211_rdev_free_wowlan(rdev); } EXPORT_SYMBOL(wiphy_unregister); @@ -748,9 +676,6 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) struct cfg80211_internal_bss *scan, *tmp; struct cfg80211_beacon_registration *reg, *treg; rfkill_destroy(rdev->rfkill); - mutex_destroy(&rdev->mtx); - mutex_destroy(&rdev->devlist_mtx); - mutex_destroy(&rdev->sched_scan_mtx); list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) { list_del(®->list); kfree(reg); @@ -775,36 +700,6 @@ void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked) } EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); -static void wdev_cleanup_work(struct work_struct *work) -{ - struct wireless_dev *wdev; - struct cfg80211_registered_device *rdev; - - wdev = container_of(work, struct wireless_dev, cleanup_work); - rdev = wiphy_to_dev(wdev->wiphy); - - mutex_lock(&rdev->sched_scan_mtx); - - if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, true); - } - - if (WARN_ON(rdev->sched_scan_req && - rdev->sched_scan_req->dev == wdev->netdev)) { - __cfg80211_stop_sched_scan(rdev, false); - } - - mutex_unlock(&rdev->sched_scan_mtx); - - mutex_lock(&rdev->devlist_mtx); - rdev->opencount--; - mutex_unlock(&rdev->devlist_mtx); - wake_up(&rdev->dev_wait); - - dev_put(wdev->netdev); -} - void cfg80211_unregister_wdev(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -814,8 +709,6 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) if (WARN_ON(wdev->netdev)) return; - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); list_del_rcu(&wdev->list); rdev->devlist_generation++; @@ -827,8 +720,6 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) WARN_ON_ONCE(1); break; } - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); } EXPORT_SYMBOL(cfg80211_unregister_wdev); @@ -847,7 +738,7 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, } void cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) + struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; @@ -857,9 +748,7 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - mutex_lock(&rdev->sched_scan_mtx); __cfg80211_stop_sched_scan(rdev, false); - mutex_unlock(&rdev->sched_scan_mtx); wdev_lock(wdev); #ifdef CONFIG_CFG80211_WEXT @@ -868,8 +757,8 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, wdev->wext.ie_len = 0; wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; #endif - __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, true); + cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, true); wdev_unlock(wdev); break; case NL80211_IFTYPE_MESH_POINT: @@ -886,10 +775,9 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, } static int cfg80211_netdev_notifier_call(struct notifier_block *nb, - unsigned long state, - void *ndev) + unsigned long state, void *ptr) { - struct net_device *dev = ndev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev; int ret; @@ -912,13 +800,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * are added with nl80211. */ mutex_init(&wdev->mtx); - INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work); INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); INIT_LIST_HEAD(&wdev->mgmt_registrations); spin_lock_init(&wdev->mgmt_registrations_lock); - mutex_lock(&rdev->devlist_mtx); wdev->identifier = ++rdev->wdev_id; list_add_rcu(&wdev->list, &rdev->wdev_list); rdev->devlist_generation++; @@ -931,7 +817,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, } wdev->netdev = dev; wdev->sme_state = CFG80211_SME_IDLE; - mutex_unlock(&rdev->devlist_mtx); #ifdef CONFIG_CFG80211_WEXT wdev->wext.default_key = -1; wdev->wext.default_mgmt_key = -1; @@ -957,26 +842,22 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; case NETDEV_DOWN: cfg80211_update_iface_num(rdev, wdev->iftype, -1); - dev_hold(dev); - queue_work(cfg80211_wq, &wdev->cleanup_work); + if (rdev->scan_req && rdev->scan_req->wdev == wdev) { + if (WARN_ON(!rdev->scan_req->notified)) + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, true); + } + + if (WARN_ON(rdev->sched_scan_req && + rdev->sched_scan_req->dev == wdev->netdev)) { + __cfg80211_stop_sched_scan(rdev, false); + } + + rdev->opencount--; + wake_up(&rdev->dev_wait); break; case NETDEV_UP: - /* - * If we have a really quick DOWN/UP succession we may - * have this work still pending ... cancel it and see - * if it was pending, in which case we need to account - * for some of the work it would have done. - */ - if (cancel_work_sync(&wdev->cleanup_work)) { - mutex_lock(&rdev->devlist_mtx); - rdev->opencount--; - mutex_unlock(&rdev->devlist_mtx); - dev_put(dev); - } cfg80211_update_iface_num(rdev, wdev->iftype, 1); - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); switch (wdev->iftype) { #ifdef CONFIG_CFG80211_WEXT @@ -1008,10 +889,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; } wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); rdev->opencount++; - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); /* * Configure power management to the driver here so that its @@ -1028,12 +906,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; case NETDEV_UNREGISTER: /* - * NB: cannot take rdev->mtx here because this may be - * called within code protected by it when interfaces - * are removed with nl80211. - */ - mutex_lock(&rdev->devlist_mtx); - /* * It is possible to get NETDEV_UNREGISTER * multiple times. To detect that, check * that the interface is still on the list @@ -1049,7 +921,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, kfree(wdev->wext.keys); #endif } - mutex_unlock(&rdev->devlist_mtx); /* * synchronise (so that we won't find this netdev * from other code any more) and then clear the list @@ -1069,9 +940,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, return notifier_from_errno(-EOPNOTSUPP); if (rfkill_blocked(rdev->rfkill)) return notifier_from_errno(-ERFKILL); - mutex_lock(&rdev->devlist_mtx); ret = cfg80211_can_add_interface(rdev, wdev->iftype); - mutex_unlock(&rdev->devlist_mtx); if (ret) return notifier_from_errno(ret); break; @@ -1089,12 +958,10 @@ static void __net_exit cfg80211_pernet_exit(struct net *net) struct cfg80211_registered_device *rdev; rtnl_lock(); - mutex_lock(&cfg80211_mutex); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (net_eq(wiphy_net(&rdev->wiphy), net)) WARN_ON(cfg80211_switch_netns(rdev, &init_net)); } - mutex_unlock(&cfg80211_mutex); rtnl_unlock(); } diff --git a/net/wireless/core.h b/net/wireless/core.h index fd35dae547c4..a65eaf8a84c1 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -5,7 +5,6 @@ */ #ifndef __NET_WIRELESS_CORE_H #define __NET_WIRELESS_CORE_H -#include <linux/mutex.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/rbtree.h> @@ -23,11 +22,6 @@ struct cfg80211_registered_device { const struct cfg80211_ops *ops; struct list_head list; - /* we hold this mutex during any call so that - * we cannot do multiple calls at once, and also - * to avoid the deregister call to proceed while - * any call is in progress */ - struct mutex mtx; /* rfkill support */ struct rfkill_ops rfkill_ops; @@ -49,9 +43,7 @@ struct cfg80211_registered_device { /* wiphy index, internal only */ int wiphy_idx; - /* associated wireless interfaces */ - struct mutex devlist_mtx; - /* protected by devlist_mtx or RCU */ + /* associated wireless interfaces, protected by rtnl or RCU */ struct list_head wdev_list; int devlist_generation, wdev_id; int opencount; /* also protected by devlist_mtx */ @@ -75,8 +67,6 @@ struct cfg80211_registered_device { struct work_struct scan_done_wk; struct work_struct sched_scan_results_wk; - struct mutex sched_scan_mtx; - #ifdef CONFIG_NL80211_TESTMODE struct genl_info *testmode_info; #endif @@ -84,8 +74,6 @@ struct cfg80211_registered_device { struct work_struct conn_work; struct work_struct event_work; - struct cfg80211_wowlan *wowlan; - struct delayed_work dfs_update_channels_wk; /* netlink port which started critical protocol (0 means not started) */ @@ -106,29 +94,26 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) static inline void cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) { +#ifdef CONFIG_PM int i; - if (!rdev->wowlan) + if (!rdev->wiphy.wowlan_config) return; - for (i = 0; i < rdev->wowlan->n_patterns; i++) - kfree(rdev->wowlan->patterns[i].mask); - kfree(rdev->wowlan->patterns); - if (rdev->wowlan->tcp && rdev->wowlan->tcp->sock) - sock_release(rdev->wowlan->tcp->sock); - kfree(rdev->wowlan->tcp); - kfree(rdev->wowlan); + for (i = 0; i < rdev->wiphy.wowlan_config->n_patterns; i++) + kfree(rdev->wiphy.wowlan_config->patterns[i].mask); + kfree(rdev->wiphy.wowlan_config->patterns); + if (rdev->wiphy.wowlan_config->tcp && + rdev->wiphy.wowlan_config->tcp->sock) + sock_release(rdev->wiphy.wowlan_config->tcp->sock); + kfree(rdev->wiphy.wowlan_config->tcp); + kfree(rdev->wiphy.wowlan_config); +#endif } extern struct workqueue_struct *cfg80211_wq; -extern struct mutex cfg80211_mutex; extern struct list_head cfg80211_rdev_list; extern int cfg80211_rdev_list_generation; -static inline void assert_cfg80211_lock(void) -{ - lockdep_assert_held(&cfg80211_mutex); -} - struct cfg80211_internal_bss { struct list_head list; struct list_head hidden_list; @@ -161,27 +146,11 @@ static inline void cfg80211_unhold_bss(struct cfg80211_internal_bss *bss) struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx); int get_wiphy_idx(struct wiphy *wiphy); -/* requires cfg80211_rdev_mutex to be held! */ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx); -/* identical to cfg80211_get_dev_from_info but only operate on ifindex */ -extern struct cfg80211_registered_device * -cfg80211_get_dev_from_ifindex(struct net *net, int ifindex); - int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, struct net *net); -static inline void cfg80211_lock_rdev(struct cfg80211_registered_device *rdev) -{ - mutex_lock(&rdev->mtx); -} - -static inline void cfg80211_unlock_rdev(struct cfg80211_registered_device *rdev) -{ - BUG_ON(IS_ERR(rdev) || !rdev); - mutex_unlock(&rdev->mtx); -} - static inline void wdev_lock(struct wireless_dev *wdev) __acquires(wdev) { @@ -196,7 +165,7 @@ static inline void wdev_unlock(struct wireless_dev *wdev) mutex_unlock(&wdev->mtx); } -#define ASSERT_RDEV_LOCK(rdev) lockdep_assert_held(&(rdev)->mtx) +#define ASSERT_RDEV_LOCK(rdev) ASSERT_RTNL() #define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx) static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) @@ -314,38 +283,21 @@ int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev); /* MLME */ -int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, - const u8 *bssid, - const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, - const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len); int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, const u8 *bssid, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_auth_type auth_type, + const u8 *bssid, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, const u8 *key, int key_len, int key_idx, const u8 *sae_data, int sae_data_len); -int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - const u8 *bssid, - const u8 *ssid, int ssid_len, - struct cfg80211_assoc_request *req); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, const u8 *bssid, const u8 *ssid, int ssid_len, struct cfg80211_assoc_request *req); -int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change); int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *bssid, const u8 *ie, int ie_len, u16 reason, @@ -377,18 +329,11 @@ void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, const struct ieee80211_vht_cap *vht_capa_mask); /* SME */ -int __cfg80211_connect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys, - const u8 *prev_bssid); int cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys); -int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, - struct net_device *dev, u16 reason, - bool wextev); + struct cfg80211_cached_keys *connkeys, + const u8 *prev_bssid); int cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason, bool wextev); diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index 920cabe0461b..90d050036624 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -74,7 +74,7 @@ static ssize_t ht40allow_map_read(struct file *file, if (!buf) return -ENOMEM; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); for (band = 0; band < IEEE80211_NUM_BANDS; band++) { sband = wiphy->bands[band]; @@ -85,7 +85,7 @@ static ssize_t ht40allow_map_read(struct file *file, buf, buf_size, offset); } - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); r = simple_read_from_buffer(user_buf, count, ppos, buf, offset); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index d80e47194d49..5449c5a6de84 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -152,11 +152,11 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - mutex_lock(&rdev->devlist_mtx); + ASSERT_RTNL(); + wdev_lock(wdev); err = __cfg80211_join_ibss(rdev, dev, params, connkeys); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -359,11 +359,9 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, wdev->wext.ibss.channel_fixed = false; } - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = cfg80211_ibss_wext_join(rdev, wdev); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -429,11 +427,9 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, memcpy(wdev->wext.ibss.ssid, ssid, len); wdev->wext.ibss.ssid_len = len; - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = cfg80211_ibss_wext_join(rdev, wdev); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -512,11 +508,9 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, } else wdev->wext.ibss.bssid = NULL; - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = cfg80211_ibss_wext_join(rdev, wdev); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 0bb93f3061a4..5dfb289ab761 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -82,6 +82,7 @@ const struct mesh_setup default_mesh_setup = { .sync_method = IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET, .path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP, .path_metric = IEEE80211_PATH_METRIC_AIRTIME, + .auth_id = 0, /* open */ .ie = NULL, .ie_len = 0, .is_secure = false, @@ -185,11 +186,9 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = __cfg80211_join_mesh(rdev, dev, setup, conf); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 0c7b7dd855f6..7bde5d9c0003 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -25,12 +25,9 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len) struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); trace_cfg80211_send_rx_auth(dev); - wdev_lock(wdev); nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); cfg80211_sme_rx_auth(dev, buf, len); - - wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_rx_auth); @@ -46,7 +43,6 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable); trace_cfg80211_send_rx_assoc(dev, bss); - wdev_lock(wdev); status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); @@ -59,7 +55,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, if (status_code != WLAN_STATUS_SUCCESS && wdev->conn && cfg80211_sme_failed_reassoc(wdev)) { cfg80211_put_bss(wiphy, bss); - goto out; + return; } nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL); @@ -71,7 +67,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, * sme will schedule work that does it later. */ cfg80211_put_bss(wiphy, bss); - goto out; + return; } if (!wdev->conn && wdev->sme_state == CFG80211_SME_IDLE) { @@ -87,13 +83,11 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs, status_code, status_code == WLAN_STATUS_SUCCESS, bss); - out: - wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_rx_assoc); -void __cfg80211_send_deauth(struct net_device *dev, - const u8 *buf, size_t len) +void cfg80211_send_deauth(struct net_device *dev, + const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -102,7 +96,7 @@ void __cfg80211_send_deauth(struct net_device *dev, const u8 *bssid = mgmt->bssid; bool was_current = false; - trace___cfg80211_send_deauth(dev); + trace_cfg80211_send_deauth(dev); ASSERT_WDEV_LOCK(wdev); if (wdev->current_bss && @@ -129,20 +123,10 @@ void __cfg80211_send_deauth(struct net_device *dev, false, NULL); } } -EXPORT_SYMBOL(__cfg80211_send_deauth); - -void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - - wdev_lock(wdev); - __cfg80211_send_deauth(dev, buf, len); - wdev_unlock(wdev); -} EXPORT_SYMBOL(cfg80211_send_deauth); -void __cfg80211_send_disassoc(struct net_device *dev, - const u8 *buf, size_t len) +void cfg80211_send_disassoc(struct net_device *dev, + const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -152,7 +136,7 @@ void __cfg80211_send_disassoc(struct net_device *dev, u16 reason_code; bool from_ap; - trace___cfg80211_send_disassoc(dev); + trace_cfg80211_send_disassoc(dev); ASSERT_WDEV_LOCK(wdev); nl80211_send_disassoc(rdev, dev, buf, len, GFP_KERNEL); @@ -175,16 +159,6 @@ void __cfg80211_send_disassoc(struct net_device *dev, from_ap = !ether_addr_equal(mgmt->sa, dev->dev_addr); __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap); } -EXPORT_SYMBOL(__cfg80211_send_disassoc); - -void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - - wdev_lock(wdev); - __cfg80211_send_disassoc(dev, buf, len); - wdev_unlock(wdev); -} EXPORT_SYMBOL(cfg80211_send_disassoc); void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) @@ -194,15 +168,12 @@ void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); trace_cfg80211_send_auth_timeout(dev, addr); - wdev_lock(wdev); nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL); if (wdev->sme_state == CFG80211_SME_CONNECTING) __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); - - wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_auth_timeout); @@ -213,15 +184,12 @@ void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr) struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); trace_cfg80211_send_assoc_timeout(dev, addr); - wdev_lock(wdev); nl80211_send_assoc_timeout(rdev, dev, addr, GFP_KERNEL); if (wdev->sme_state == CFG80211_SME_CONNECTING) __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); - - wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_assoc_timeout); @@ -253,18 +221,27 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, EXPORT_SYMBOL(cfg80211_michael_mic_failure); /* some MLME handling for userspace SME */ -int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, - const u8 *bssid, - const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, - const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len) +int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_auth_type auth_type, + const u8 *bssid, + const u8 *ssid, int ssid_len, + const u8 *ie, int ie_len, + const u8 *key, int key_len, int key_idx, + const u8 *sae_data, int sae_data_len) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_auth_request req; + struct cfg80211_auth_request req = { + .ie = ie, + .ie_len = ie_len, + .sae_data = sae_data, + .sae_data_len = sae_data_len, + .auth_type = auth_type, + .key = key, + .key_len = key_len, + .key_idx = key_idx, + }; int err; ASSERT_WDEV_LOCK(wdev); @@ -277,18 +254,8 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, ether_addr_equal(bssid, wdev->current_bss->pub.bssid)) return -EALREADY; - memset(&req, 0, sizeof(req)); - - req.ie = ie; - req.ie_len = ie_len; - req.sae_data = sae_data; - req.sae_data_len = sae_data_len; - req.auth_type = auth_type; req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); - req.key = key; - req.key_len = key_len; - req.key_idx = key_idx; if (!req.bss) return -ENOENT; @@ -304,28 +271,6 @@ out: return err; } -int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, const u8 *bssid, - const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, - const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len) -{ - int err; - - mutex_lock(&rdev->devlist_mtx); - wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, - ssid, ssid_len, ie, ie_len, - key, key_len, key_idx, - sae_data, sae_data_len); - wdev_unlock(dev->ieee80211_ptr); - mutex_unlock(&rdev->devlist_mtx); - - return err; -} - /* Do a logical ht_capa &= ht_capa_mask. */ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, const struct ieee80211_ht_cap *ht_capa_mask) @@ -360,12 +305,12 @@ void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, p1[i] &= p2[i]; } -int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - const u8 *bssid, - const u8 *ssid, int ssid_len, - struct cfg80211_assoc_request *req) +int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + const u8 *bssid, + const u8 *ssid, int ssid_len, + struct cfg80211_assoc_request *req) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; @@ -415,30 +360,10 @@ out: return err; } -int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - const u8 *bssid, - const u8 *ssid, int ssid_len, - struct cfg80211_assoc_request *req) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - mutex_lock(&rdev->devlist_mtx); - wdev_lock(wdev); - err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, - ssid, ssid_len, req); - wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); - - return err; -} - -int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) +int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *bssid, + const u8 *ie, int ie_len, u16 reason, + bool local_state_change) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_deauth_request req = { @@ -458,29 +383,18 @@ int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, return rdev_deauth(rdev, dev, &req); } -int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason, - local_state_change); - wdev_unlock(wdev); - - return err; -} - -static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) +int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *bssid, + const u8 *ie, int ie_len, u16 reason, + bool local_state_change) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_disassoc_request req; + struct cfg80211_disassoc_request req = { + .reason_code = reason, + .local_state_change = local_state_change, + .ie = ie, + .ie_len = ie_len, + }; ASSERT_WDEV_LOCK(wdev); @@ -490,11 +404,6 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, if (WARN(!wdev->current_bss, "sme_state=%d\n", wdev->sme_state)) return -ENOTCONN; - memset(&req, 0, sizeof(req)); - req.reason_code = reason; - req.local_state_change = local_state_change; - req.ie = ie; - req.ie_len = ie_len; if (ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) req.bss = &wdev->current_bss->pub; else @@ -503,44 +412,25 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, return rdev_disassoc(rdev, dev, &req); } -int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason, - local_state_change); - wdev_unlock(wdev); - - return err; -} - void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_deauth_request req; u8 bssid[ETH_ALEN]; + struct cfg80211_deauth_request req = { + .reason_code = WLAN_REASON_DEAUTH_LEAVING, + .bssid = bssid, + }; ASSERT_WDEV_LOCK(wdev); if (!rdev->ops->deauth) return; - memset(&req, 0, sizeof(req)); - req.reason_code = WLAN_REASON_DEAUTH_LEAVING; - req.ie = NULL; - req.ie_len = 0; - if (!wdev->current_bss) return; memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); - req.bssid = bssid; rdev_deauth(rdev, dev, &req); if (wdev->current_bss) { @@ -848,7 +738,7 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) dfs_update_channels_wk); wiphy = &rdev->wiphy; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) { sband = wiphy->bands[bandid]; if (!sband) @@ -881,7 +771,7 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) check_again = true; } } - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); /* reschedule if there are other channels waiting to be cleared again */ if (check_again) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b14b7e3cb6e6..ea74b9dd9d82 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -37,10 +37,10 @@ static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb, /* the netlink family */ static struct genl_family nl80211_fam = { - .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ - .name = "nl80211", /* have users key off the name instead */ - .hdrsize = 0, /* no private header */ - .version = 1, /* no particular meaning now */ + .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ + .name = NL80211_GENL_NAME, /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ .maxattr = NL80211_ATTR_MAX, .netnsok = true, .pre_doit = nl80211_pre_doit, @@ -59,7 +59,7 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) int wiphy_idx = -1; int ifidx = -1; - assert_cfg80211_lock(); + ASSERT_RTNL(); if (!have_ifidx && !have_wdev_id) return ERR_PTR(-EINVAL); @@ -80,7 +80,6 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) if (have_wdev_id && rdev->wiphy_idx != wiphy_idx) continue; - mutex_lock(&rdev->devlist_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (have_ifidx && wdev->netdev && wdev->netdev->ifindex == ifidx) { @@ -92,7 +91,6 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) break; } } - mutex_unlock(&rdev->devlist_mtx); if (result) break; @@ -109,7 +107,7 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) struct cfg80211_registered_device *rdev = NULL, *tmp; struct net_device *netdev; - assert_cfg80211_lock(); + ASSERT_RTNL(); if (!attrs[NL80211_ATTR_WIPHY] && !attrs[NL80211_ATTR_IFINDEX] && @@ -128,14 +126,12 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) tmp = cfg80211_rdev_by_wiphy_idx(wdev_id >> 32); if (tmp) { /* make sure wdev exists */ - mutex_lock(&tmp->devlist_mtx); list_for_each_entry(wdev, &tmp->wdev_list, list) { if (wdev->identifier != (u32)wdev_id) continue; found = true; break; } - mutex_unlock(&tmp->devlist_mtx); if (!found) tmp = NULL; @@ -182,19 +178,6 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) /* * This function returns a pointer to the driver * that the genl_info item that is passed refers to. - * If successful, it returns non-NULL and also locks - * the driver's mutex! - * - * This means that you need to call cfg80211_unlock_rdev() - * before being allowed to acquire &cfg80211_mutex! - * - * This is necessary because we need to lock the global - * mutex to get an item off the list safely, and then - * we lock the rdev mutex so it doesn't go away under us. - * - * We don't want to keep cfg80211_mutex locked - * for all the time in order to allow requests on - * other interfaces to go through at the same time. * * The result of this can be a PTR_ERR and hence must * be checked with IS_ERR() for errors. @@ -202,20 +185,7 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) static struct cfg80211_registered_device * cfg80211_get_dev_from_info(struct net *netns, struct genl_info *info) { - struct cfg80211_registered_device *rdev; - - mutex_lock(&cfg80211_mutex); - rdev = __cfg80211_rdev_from_attrs(netns, info->attrs); - - /* if it is not an error we grab the lock on - * it to assure it won't be going away while - * we operate on it */ - if (!IS_ERR(rdev)) - mutex_lock(&rdev->mtx); - - mutex_unlock(&cfg80211_mutex); - - return rdev; + return __cfg80211_rdev_from_attrs(netns, info->attrs); } /* policy for the attributes */ @@ -378,6 +348,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_MDID] = { .type = NLA_U16 }, [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_PEER_AID] = { .type = NLA_U16 }, }; /* policy for the key attributes */ @@ -455,7 +426,6 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, int err; rtnl_lock(); - mutex_lock(&cfg80211_mutex); if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, @@ -484,14 +454,12 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, *rdev = wiphy_to_dev(wiphy); *wdev = NULL; - mutex_lock(&(*rdev)->devlist_mtx); list_for_each_entry(tmp, &(*rdev)->wdev_list, list) { if (tmp->identifier == cb->args[1]) { *wdev = tmp; break; } } - mutex_unlock(&(*rdev)->devlist_mtx); if (!*wdev) { err = -ENODEV; @@ -499,19 +467,14 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, } } - cfg80211_lock_rdev(*rdev); - - mutex_unlock(&cfg80211_mutex); return 0; out_unlock: - mutex_unlock(&cfg80211_mutex); rtnl_unlock(); return err; } static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev) { - cfg80211_unlock_rdev(rdev); rtnl_unlock(); } @@ -1572,7 +1535,8 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) if (!tb) return -ENOMEM; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); + res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb, NL80211_ATTR_MAX, nl80211_policy); if (res == 0) { @@ -1587,7 +1551,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) netdev = dev_get_by_index(sock_net(skb->sk), ifidx); if (!netdev) { - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); kfree(tb); return -ENODEV; } @@ -1635,7 +1599,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) !skb->len && cb->min_dump_alloc < 4096) { cb->min_dump_alloc = 4096; - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); return 1; } idx--; @@ -1644,7 +1608,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) } while (cb->args[1] > 0); break; } - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); cb->args[0] = idx; @@ -1799,7 +1763,6 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, if (result) return result; - mutex_lock(&rdev->devlist_mtx); switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: @@ -1823,7 +1786,6 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, default: result = -EINVAL; } - mutex_unlock(&rdev->devlist_mtx); return result; } @@ -1872,6 +1834,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) u32 frag_threshold = 0, rts_threshold = 0; u8 coverage_class = 0; + ASSERT_RTNL(); + /* * Try to find the wiphy and netdev. Normally this * function shouldn't need the netdev, but this is @@ -1881,31 +1845,25 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) * also passed a netdev to set_wiphy, so that it is * possible to let that go to the right netdev! */ - mutex_lock(&cfg80211_mutex); if (info->attrs[NL80211_ATTR_IFINDEX]) { int ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); netdev = dev_get_by_index(genl_info_net(info), ifindex); - if (netdev && netdev->ieee80211_ptr) { + if (netdev && netdev->ieee80211_ptr) rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy); - mutex_lock(&rdev->mtx); - } else + else netdev = NULL; } if (!netdev) { rdev = __cfg80211_rdev_from_attrs(genl_info_net(info), info->attrs); - if (IS_ERR(rdev)) { - mutex_unlock(&cfg80211_mutex); + if (IS_ERR(rdev)) return PTR_ERR(rdev); - } wdev = NULL; netdev = NULL; result = 0; - - mutex_lock(&rdev->mtx); } else wdev = netdev->ieee80211_ptr; @@ -1918,8 +1876,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = cfg80211_dev_rename( rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); - mutex_unlock(&cfg80211_mutex); - if (result) goto bad_res; @@ -2126,7 +2082,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } bad_res: - mutex_unlock(&rdev->mtx); if (netdev) dev_put(netdev); return result; @@ -2224,7 +2179,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; @@ -2234,7 +2189,6 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * } if_idx = 0; - mutex_lock(&rdev->devlist_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (if_idx < if_start) { if_idx++; @@ -2243,17 +2197,15 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev) < 0) { - mutex_unlock(&rdev->devlist_mtx); goto out; } if_idx++; } - mutex_unlock(&rdev->devlist_mtx); wp_idx++; } out: - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); cb->args[0] = wp_idx; cb->args[1] = if_idx; @@ -2286,6 +2238,7 @@ static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = { [NL80211_MNTR_FLAG_CONTROL] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_OTHER_BSS] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_COOK_FRAMES] = { .type = NLA_FLAG }, + [NL80211_MNTR_FLAG_ACTIVE] = { .type = NLA_FLAG }, }; static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) @@ -2397,6 +2350,10 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) change = true; } + if (flags && (*flags & NL80211_MNTR_FLAG_ACTIVE) && + !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) + return -EOPNOTSUPP; + if (change) err = cfg80211_change_iface(rdev, dev, ntype, flags, ¶ms); else @@ -2454,6 +2411,11 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); + + if (!err && (flags & NL80211_MNTR_FLAG_ACTIVE) && + !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) + return -EOPNOTSUPP; + wdev = rdev_add_virtual_intf(rdev, nla_data(info->attrs[NL80211_ATTR_IFNAME]), type, err ? NULL : &flags, ¶ms); @@ -2486,11 +2448,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) INIT_LIST_HEAD(&wdev->mgmt_registrations); spin_lock_init(&wdev->mgmt_registrations_lock); - mutex_lock(&rdev->devlist_mtx); wdev->identifier = ++rdev->wdev_id; list_add_rcu(&wdev->list, &rdev->wdev_list); rdev->devlist_generation++; - mutex_unlock(&rdev->devlist_mtx); break; default: break; @@ -2999,8 +2959,6 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev; bool ret = false; - mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) @@ -3014,8 +2972,6 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, break; } - mutex_unlock(&rdev->devlist_mtx); - return ret; } @@ -3177,13 +3133,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params.radar_required = true; } - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, params.chandef.chan, CHAN_MODE_SHARED, radar_detect_width); - mutex_unlock(&rdev->devlist_mtx); - if (err) return err; @@ -3383,6 +3336,32 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, return true; } +static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal, + int id) +{ + void *attr; + int i = 0; + + if (!mask) + return true; + + attr = nla_nest_start(msg, id); + if (!attr) + return false; + + for (i = 0; i < IEEE80211_MAX_CHAINS; i++) { + if (!(mask & BIT(i))) + continue; + + if (nla_put_u8(msg, i, signal[i])) + return false; + } + + nla_nest_end(msg, attr); + + return true; +} + static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, @@ -3454,6 +3433,18 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, default: break; } + if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL) { + if (!nl80211_put_signal(msg, sinfo->chains, + sinfo->chain_signal, + NL80211_STA_INFO_CHAIN_SIGNAL)) + goto nla_put_failure; + } + if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL_AVG) { + if (!nl80211_put_signal(msg, sinfo->chains, + sinfo->chain_signal_avg, + NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) + goto nla_put_failure; + } if (sinfo->filled & STATION_INFO_TX_BITRATE) { if (!nl80211_put_sta_rate(msg, &sinfo->txrate, NL80211_STA_INFO_TX_BITRATE)) @@ -3841,6 +3832,8 @@ static int nl80211_set_station_tdls(struct genl_info *info, struct station_parameters *params) { /* Dummy STA entry gets updated once the peer capabilities are known */ + if (info->attrs[NL80211_ATTR_PEER_AID]) + params->aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params->ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); @@ -3981,7 +3974,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_AID]) + if (!info->attrs[NL80211_ATTR_STA_AID] && + !info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -3992,7 +3986,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); - params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + if (info->attrs[NL80211_ATTR_STA_AID]) + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + else + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); if (!params.aid || params.aid > IEEE80211_MAX_AID) return -EINVAL; @@ -4641,6 +4638,7 @@ static const struct nla_policy [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, + [NL80211_MESH_SETUP_AUTH_PROTOCOL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG }, [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, @@ -4826,6 +4824,13 @@ static int nl80211_parse_mesh_setup(struct genl_info *info, if (setup->is_secure) setup->user_mpm = true; + if (tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]) { + if (!setup->user_mpm) + return -EINVAL; + setup->auth_id = + nla_get_u8(tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]); + } + return 0; } @@ -4868,18 +4873,13 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) void *hdr = NULL; struct nlattr *nl_reg_rules; unsigned int i; - int err = -EINVAL; - - mutex_lock(&cfg80211_mutex); if (!cfg80211_regdomain) - goto out; + return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) { - err = -ENOBUFS; - goto out; - } + if (!msg) + return -ENOBUFS; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_REG); @@ -4938,8 +4938,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) nla_nest_end(msg, nl_reg_rules); genlmsg_end(msg, hdr); - err = genlmsg_reply(msg, info); - goto out; + return genlmsg_reply(msg, info); nla_put_failure_rcu: rcu_read_unlock(); @@ -4947,10 +4946,7 @@ nla_put_failure: genlmsg_cancel(msg, hdr); put_failure: nlmsg_free(msg); - err = -EMSGSIZE; -out: - mutex_unlock(&cfg80211_mutex); - return err; + return -EMSGSIZE; } static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) @@ -5016,12 +5012,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) } } - mutex_lock(&cfg80211_mutex); - r = set_regdom(rd); /* set_regdom took ownership */ rd = NULL; - mutex_unlock(&cfg80211_mutex); bad_reg: kfree(rd); @@ -5071,7 +5064,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->scan) return -EOPNOTSUPP; - mutex_lock(&rdev->sched_scan_mtx); if (rdev->scan_req) { err = -EBUSY; goto unlock; @@ -5257,7 +5249,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } unlock: - mutex_unlock(&rdev->sched_scan_mtx); return err; } @@ -5329,8 +5320,6 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (ie_len > wiphy->max_sched_scan_ie_len) return -EINVAL; - mutex_lock(&rdev->sched_scan_mtx); - if (rdev->sched_scan_req) { err = -EINPROGRESS; goto out; @@ -5498,7 +5487,6 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, out_free: kfree(request); out: - mutex_unlock(&rdev->sched_scan_mtx); return err; } @@ -5506,17 +5494,12 @@ static int nl80211_stop_sched_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - int err; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) || !rdev->ops->sched_scan_stop) return -EOPNOTSUPP; - mutex_lock(&rdev->sched_scan_mtx); - err = __cfg80211_stop_sched_scan(rdev, false); - mutex_unlock(&rdev->sched_scan_mtx); - - return err; + return __cfg80211_stop_sched_scan(rdev, false); } static int nl80211_start_radar_detection(struct sk_buff *skb, @@ -5548,12 +5531,11 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (!rdev->ops->start_radar_detection) return -EOPNOTSUPP; - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, chandef.chan, CHAN_MODE_SHARED, BIT(chandef.width)); if (err) - goto err_locked; + return err; err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef); if (!err) { @@ -5561,9 +5543,6 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, wdev->cac_started = true; wdev->cac_start_time = jiffies; } -err_locked: - mutex_unlock(&rdev->devlist_mtx); - return err; } @@ -5946,10 +5925,13 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) if (local_state_change) return 0; - return cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, - ssid, ssid_len, ie, ie_len, - key.p.key, key.p.key_len, key.idx, - sae_data, sae_data_len); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, + ssid, ssid_len, ie, ie_len, + key.p.key, key.p.key_len, key.idx, + sae_data, sae_data_len); + wdev_unlock(dev->ieee80211_ptr); + return err; } static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, @@ -6116,9 +6098,12 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) } err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); - if (!err) + if (!err) { + wdev_lock(dev->ieee80211_ptr); err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, ssid, ssid_len, &req); + wdev_unlock(dev->ieee80211_ptr); + } return err; } @@ -6128,7 +6113,7 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; - int ie_len = 0; + int ie_len = 0, err; u16 reason_code; bool local_state_change; @@ -6163,8 +6148,11 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; - return cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, - local_state_change); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, + local_state_change); + wdev_unlock(dev->ieee80211_ptr); + return err; } static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) @@ -6172,7 +6160,7 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; - int ie_len = 0; + int ie_len = 0, err; u16 reason_code; bool local_state_change; @@ -6207,8 +6195,11 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; - return cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, - local_state_change); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, + local_state_change); + wdev_unlock(dev->ieee80211_ptr); + return err; } static bool @@ -6426,6 +6417,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb, void *data = NULL; int data_len = 0; + rtnl_lock(); + if (cb->args[0]) { /* * 0 is a valid index, but not valid for args[0], @@ -6437,18 +6430,16 @@ static int nl80211_testmode_dump(struct sk_buff *skb, nl80211_fam.attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) - return err; + goto out_err; - mutex_lock(&cfg80211_mutex); rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), nl80211_fam.attrbuf); if (IS_ERR(rdev)) { - mutex_unlock(&cfg80211_mutex); - return PTR_ERR(rdev); + err = PTR_ERR(rdev); + goto out_err; } phy_idx = rdev->wiphy_idx; rdev = NULL; - mutex_unlock(&cfg80211_mutex); if (nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA]) cb->args[1] = @@ -6460,14 +6451,11 @@ static int nl80211_testmode_dump(struct sk_buff *skb, data_len = nla_len((void *)cb->args[1]); } - mutex_lock(&cfg80211_mutex); rdev = cfg80211_rdev_by_wiphy_idx(phy_idx); if (!rdev) { - mutex_unlock(&cfg80211_mutex); - return -ENOENT; + err = -ENOENT; + goto out_err; } - cfg80211_lock_rdev(rdev); - mutex_unlock(&cfg80211_mutex); if (!rdev->ops->testmode_dump) { err = -EOPNOTSUPP; @@ -6508,7 +6496,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, /* see above */ cb->args[0] = phy_idx + 1; out_err: - cfg80211_unlock_rdev(rdev); + rtnl_unlock(); return err; } @@ -6716,7 +6704,9 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) sizeof(connect.vht_capa)); } - err = cfg80211_connect(rdev, dev, &connect, connkeys); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_connect(rdev, dev, &connect, connkeys, NULL); + wdev_unlock(dev->ieee80211_ptr); if (err) kfree(connkeys); return err; @@ -6727,6 +6717,7 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u16 reason; + int ret; if (!info->attrs[NL80211_ATTR_REASON_CODE]) reason = WLAN_REASON_DEAUTH_LEAVING; @@ -6740,7 +6731,10 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - return cfg80211_disconnect(rdev, dev, reason, true); + wdev_lock(dev->ieee80211_ptr); + ret = cfg80211_disconnect(rdev, dev, reason, true); + wdev_unlock(dev->ieee80211_ptr); + return ret; } static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) @@ -7516,28 +7510,29 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) static int nl80211_send_wowlan_patterns(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { + struct cfg80211_wowlan *wowlan = rdev->wiphy.wowlan_config; struct nlattr *nl_pats, *nl_pat; int i, pat_len; - if (!rdev->wowlan->n_patterns) + if (!wowlan->n_patterns) return 0; nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); if (!nl_pats) return -ENOBUFS; - for (i = 0; i < rdev->wowlan->n_patterns; i++) { + for (i = 0; i < wowlan->n_patterns; i++) { nl_pat = nla_nest_start(msg, i + 1); if (!nl_pat) return -ENOBUFS; - pat_len = rdev->wowlan->patterns[i].pattern_len; + pat_len = wowlan->patterns[i].pattern_len; if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, DIV_ROUND_UP(pat_len, 8), - rdev->wowlan->patterns[i].mask) || + wowlan->patterns[i].mask) || nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, - pat_len, rdev->wowlan->patterns[i].pattern) || + pat_len, wowlan->patterns[i].pattern) || nla_put_u32(msg, NL80211_WOWLAN_PKTPAT_OFFSET, - rdev->wowlan->patterns[i].pkt_offset)) + wowlan->patterns[i].pkt_offset)) return -ENOBUFS; nla_nest_end(msg, nl_pat); } @@ -7600,12 +7595,12 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) !rdev->wiphy.wowlan.tcp) return -EOPNOTSUPP; - if (rdev->wowlan && rdev->wowlan->tcp) { + if (rdev->wiphy.wowlan_config && rdev->wiphy.wowlan_config->tcp) { /* adjust size to have room for all the data */ - size += rdev->wowlan->tcp->tokens_size + - rdev->wowlan->tcp->payload_len + - rdev->wowlan->tcp->wake_len + - rdev->wowlan->tcp->wake_len / 8; + size += rdev->wiphy.wowlan_config->tcp->tokens_size + + rdev->wiphy.wowlan_config->tcp->payload_len + + rdev->wiphy.wowlan_config->tcp->wake_len + + rdev->wiphy.wowlan_config->tcp->wake_len / 8; } msg = nlmsg_new(size, GFP_KERNEL); @@ -7617,33 +7612,34 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) if (!hdr) goto nla_put_failure; - if (rdev->wowlan) { + if (rdev->wiphy.wowlan_config) { struct nlattr *nl_wowlan; nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); if (!nl_wowlan) goto nla_put_failure; - if ((rdev->wowlan->any && + if ((rdev->wiphy.wowlan_config->any && nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || - (rdev->wowlan->disconnect && + (rdev->wiphy.wowlan_config->disconnect && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || - (rdev->wowlan->magic_pkt && + (rdev->wiphy.wowlan_config->magic_pkt && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || - (rdev->wowlan->gtk_rekey_failure && + (rdev->wiphy.wowlan_config->gtk_rekey_failure && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || - (rdev->wowlan->eap_identity_req && + (rdev->wiphy.wowlan_config->eap_identity_req && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || - (rdev->wowlan->four_way_handshake && + (rdev->wiphy.wowlan_config->four_way_handshake && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || - (rdev->wowlan->rfkill_release && + (rdev->wiphy.wowlan_config->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) goto nla_put_failure; if (nl80211_send_wowlan_patterns(msg, rdev)) goto nla_put_failure; - if (nl80211_send_wowlan_tcp(msg, rdev->wowlan->tcp)) + if (nl80211_send_wowlan_tcp(msg, + rdev->wiphy.wowlan_config->tcp)) goto nla_put_failure; nla_nest_end(msg, nl_wowlan); @@ -7810,7 +7806,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) struct cfg80211_wowlan *ntrig; struct wiphy_wowlan_support *wowlan = &rdev->wiphy.wowlan; int err, i; - bool prev_enabled = rdev->wowlan; + bool prev_enabled = rdev->wiphy.wowlan_config; if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && !rdev->wiphy.wowlan.tcp) @@ -7818,7 +7814,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) { cfg80211_rdev_free_wowlan(rdev); - rdev->wowlan = NULL; + rdev->wiphy.wowlan_config = NULL; goto set_wakeup; } @@ -7954,11 +7950,12 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto error; } cfg80211_rdev_free_wowlan(rdev); - rdev->wowlan = ntrig; + rdev->wiphy.wowlan_config = ntrig; set_wakeup: - if (rdev->ops->set_wakeup && prev_enabled != !!rdev->wowlan) - rdev_set_wakeup(rdev, rdev->wowlan); + if (rdev->ops->set_wakeup && + prev_enabled != !!rdev->wiphy.wowlan_config) + rdev_set_wakeup(rdev, rdev->wiphy.wowlan_config); return 0; error: @@ -8143,9 +8140,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) if (wdev->p2p_started) return 0; - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_add_interface(rdev, wdev->iftype); - mutex_unlock(&rdev->devlist_mtx); if (err) return err; @@ -8154,9 +8149,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) return err; wdev->p2p_started = true; - mutex_lock(&rdev->devlist_mtx); rdev->opencount++; - mutex_unlock(&rdev->devlist_mtx); return 0; } @@ -8172,11 +8165,7 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->stop_p2p_device) return -EOPNOTSUPP; - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); cfg80211_stop_p2p_device(rdev, wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); return 0; } @@ -8319,11 +8308,11 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, info->user_ptr[0] = rdev; } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV || ops->internal_flags & NL80211_FLAG_NEED_WDEV) { - mutex_lock(&cfg80211_mutex); + ASSERT_RTNL(); + wdev = __cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs); if (IS_ERR(wdev)) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return PTR_ERR(wdev); @@ -8334,7 +8323,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) { if (!dev) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return -EINVAL; @@ -8348,7 +8336,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, if (dev) { if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && !netif_running(dev)) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return -ENETDOWN; @@ -8357,17 +8344,12 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, dev_hold(dev); } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) { if (!wdev->p2p_started) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return -ENETDOWN; } } - cfg80211_lock_rdev(rdev); - - mutex_unlock(&cfg80211_mutex); - info->user_ptr[0] = rdev; } @@ -8377,8 +8359,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { - if (info->user_ptr[0]) - cfg80211_unlock_rdev(info->user_ptr[0]); if (info->user_ptr[1]) { if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) { struct wireless_dev *wdev = info->user_ptr[1]; @@ -8400,7 +8380,8 @@ static struct genl_ops nl80211_ops[] = { .dumpit = nl80211_dump_wiphy, .policy = nl80211_policy, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_WIPHY, + .internal_flags = NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_WIPHY, @@ -8415,7 +8396,8 @@ static struct genl_ops nl80211_ops[] = { .dumpit = nl80211_dump_interface, .policy = nl80211_policy, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_WDEV, + .internal_flags = NL80211_FLAG_NEED_WDEV | + NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_INTERFACE, @@ -8574,6 +8556,7 @@ static struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_REG, .doit = nl80211_get_reg, .policy = nl80211_policy, + .internal_flags = NL80211_FLAG_NEED_RTNL, /* can be retrieved by unprivileged users */ }, { @@ -8581,6 +8564,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_set_reg, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_REQ_SET_REG, @@ -9036,8 +9020,6 @@ static int nl80211_add_scan_req(struct sk_buff *msg, struct nlattr *nest; int i; - lockdep_assert_held(&rdev->sched_scan_mtx); - if (WARN_ON(!req)) return 0; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index cc35fbaa4578..e1d6749234c6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -81,7 +81,10 @@ static struct regulatory_request core_request_world = { .country_ie_env = ENVIRON_ANY, }; -/* Receipt of information from last regulatory request */ +/* + * Receipt of information from last regulatory request, + * protected by RTNL (and can be accessed with RCU protection) + */ static struct regulatory_request __rcu *last_request = (void __rcu *)&core_request_world; @@ -96,39 +99,25 @@ static struct device_type reg_device_type = { * Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no * information to give us an alpha2. + * (protected by RTNL, can be read under RCU) */ const struct ieee80211_regdomain __rcu *cfg80211_regdomain; /* - * Protects static reg.c components: - * - cfg80211_regdomain (if not used with RCU) - * - cfg80211_world_regdom - * - last_request (if not used with RCU) - * - reg_num_devs_support_basehint - */ -static DEFINE_MUTEX(reg_mutex); - -/* * Number of devices that registered to the core * that support cellular base station regulatory hints + * (protected by RTNL) */ static int reg_num_devs_support_basehint; -static inline void assert_reg_lock(void) -{ - lockdep_assert_held(®_mutex); -} - static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { - return rcu_dereference_protected(cfg80211_regdomain, - lockdep_is_held(®_mutex)); + return rtnl_dereference(cfg80211_regdomain); } static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) { - return rcu_dereference_protected(wiphy->regd, - lockdep_is_held(®_mutex)); + return rtnl_dereference(wiphy->regd); } static void rcu_free_regdom(const struct ieee80211_regdomain *r) @@ -140,8 +129,7 @@ static void rcu_free_regdom(const struct ieee80211_regdomain *r) static struct regulatory_request *get_last_request(void) { - return rcu_dereference_check(last_request, - lockdep_is_held(®_mutex)); + return rcu_dereference_rtnl(last_request); } /* Used to queue up regulatory hints */ @@ -200,6 +188,7 @@ static const struct ieee80211_regdomain world_regdom = { } }; +/* protected by RTNL */ static const struct ieee80211_regdomain *cfg80211_world_regdom = &world_regdom; @@ -215,7 +204,7 @@ static void reset_regdomains(bool full_reset, const struct ieee80211_regdomain *r; struct regulatory_request *lr; - assert_reg_lock(); + ASSERT_RTNL(); r = get_cfg80211_regdom(); @@ -377,7 +366,7 @@ static void reg_regdb_search(struct work_struct *work) const struct ieee80211_regdomain *curdom, *regdom = NULL; int i; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); mutex_lock(®_regdb_search_mutex); while (!list_empty(®_regdb_search_list)) { @@ -402,7 +391,7 @@ static void reg_regdb_search(struct work_struct *work) if (!IS_ERR_OR_NULL(regdom)) set_regdom(regdom); - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); } static DECLARE_WORK(reg_regdb_work, reg_regdb_search); @@ -936,13 +925,7 @@ static bool reg_request_cell_base(struct regulatory_request *request) bool reg_last_request_cell_base(void) { - bool val; - - mutex_lock(®_mutex); - val = reg_request_cell_base(get_last_request()); - mutex_unlock(®_mutex); - - return val; + return reg_request_cell_base(get_last_request()); } #ifdef CONFIG_CFG80211_CERTIFICATION_ONUS @@ -1225,7 +1208,7 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) struct cfg80211_registered_device *rdev; struct wiphy *wiphy; - assert_cfg80211_lock(); + ASSERT_RTNL(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { wiphy = &rdev->wiphy; @@ -1444,8 +1427,6 @@ static void reg_set_request_processed(void) * what it believes should be the current regulatory domain. * * Returns one of the different reg request treatment values. - * - * Caller must hold ®_mutex */ static enum reg_request_treatment __regulatory_hint(struct wiphy *wiphy, @@ -1570,21 +1551,19 @@ static void reg_process_pending_hints(void) { struct regulatory_request *reg_request, *lr; - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); lr = get_last_request(); /* When last_request->processed becomes true this will be rescheduled */ if (lr && !lr->processed) { REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n"); - goto out; + return; } spin_lock(®_requests_lock); if (list_empty(®_requests_list)) { spin_unlock(®_requests_lock); - goto out; + return; } reg_request = list_first_entry(®_requests_list, @@ -1595,10 +1574,6 @@ static void reg_process_pending_hints(void) spin_unlock(®_requests_lock); reg_process_hint(reg_request, reg_request->initiator); - -out: - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); } /* Processes beacon hints -- this has nothing to do with country IEs */ @@ -1607,9 +1582,6 @@ static void reg_process_pending_beacon_hints(void) struct cfg80211_registered_device *rdev; struct reg_beacon *pending_beacon, *tmp; - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); - /* This goes through the _pending_ beacon list */ spin_lock_bh(®_pending_beacons_lock); @@ -1626,14 +1598,14 @@ static void reg_process_pending_beacon_hints(void) } spin_unlock_bh(®_pending_beacons_lock); - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); } static void reg_todo(struct work_struct *work) { + rtnl_lock(); reg_process_pending_hints(); reg_process_pending_beacon_hints(); + rtnl_unlock(); } static void queue_regulatory_request(struct regulatory_request *request) @@ -1717,29 +1689,23 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) } EXPORT_SYMBOL(regulatory_hint); -/* - * We hold wdev_lock() here so we cannot hold cfg80211_mutex() and - * therefore cannot iterate over the rdev list here. - */ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, const u8 *country_ie, u8 country_ie_len) { char alpha2[2]; enum environment_cap env = ENVIRON_ANY; - struct regulatory_request *request, *lr; - - mutex_lock(®_mutex); - lr = get_last_request(); - - if (unlikely(!lr)) - goto out; + struct regulatory_request *request = NULL, *lr; /* IE len must be evenly divisible by 2 */ if (country_ie_len & 0x01) - goto out; + return; if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) - goto out; + return; + + request = kzalloc(sizeof(*request), GFP_KERNEL); + if (!request) + return; alpha2[0] = country_ie[0]; alpha2[1] = country_ie[1]; @@ -1749,19 +1715,21 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, else if (country_ie[2] == 'O') env = ENVIRON_OUTDOOR; + rcu_read_lock(); + lr = get_last_request(); + + if (unlikely(!lr)) + goto out; + /* * We will run this only upon a successful connection on cfg80211. * We leave conflict resolution to the workqueue, where can hold - * cfg80211_mutex. + * the RTNL. */ if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && lr->wiphy_idx != WIPHY_IDX_INVALID) goto out; - request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); - if (!request) - goto out; - request->wiphy_idx = get_wiphy_idx(wiphy); request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; @@ -1769,8 +1737,10 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, request->country_ie_env = env; queue_regulatory_request(request); + request = NULL; out: - mutex_unlock(®_mutex); + kfree(request); + rcu_read_unlock(); } static void restore_alpha2(char *alpha2, bool reset_user) @@ -1858,8 +1828,7 @@ static void restore_regulatory_settings(bool reset_user) LIST_HEAD(tmp_reg_req_list); struct cfg80211_registered_device *rdev; - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); + ASSERT_RTNL(); reset_regdomains(true, &world_regdom); restore_alpha2(alpha2, reset_user); @@ -1914,9 +1883,6 @@ static void restore_regulatory_settings(bool reset_user) list_splice_tail_init(&tmp_reg_req_list, ®_requests_list); spin_unlock(®_requests_lock); - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); - REG_DBG_PRINT("Kicking the queue\n"); schedule_work(®_work); @@ -2231,7 +2197,6 @@ int set_regdom(const struct ieee80211_regdomain *rd) struct regulatory_request *lr; int r; - mutex_lock(®_mutex); lr = get_last_request(); /* Note that this doesn't update the wiphys, this is done below */ @@ -2241,14 +2206,12 @@ int set_regdom(const struct ieee80211_regdomain *rd) reg_set_request_processed(); kfree(rd); - goto out; + return r; } /* This would make this whole thing pointless */ - if (WARN_ON(!lr->intersect && rd != get_cfg80211_regdom())) { - r = -EINVAL; - goto out; - } + if (WARN_ON(!lr->intersect && rd != get_cfg80211_regdom())) + return -EINVAL; /* update all wiphys now with the new established regulatory domain */ update_all_wiphy_regulatory(lr->initiator); @@ -2259,10 +2222,7 @@ int set_regdom(const struct ieee80211_regdomain *rd) reg_set_request_processed(); - out: - mutex_unlock(®_mutex); - - return r; + return 0; } int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) @@ -2287,23 +2247,17 @@ int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) void wiphy_regulatory_register(struct wiphy *wiphy) { - mutex_lock(®_mutex); - if (!reg_dev_ignore_cell_hint(wiphy)) reg_num_devs_support_basehint++; wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); - - mutex_unlock(®_mutex); } -/* Caller must hold cfg80211_mutex */ void wiphy_regulatory_deregister(struct wiphy *wiphy) { struct wiphy *request_wiphy = NULL; struct regulatory_request *lr; - mutex_lock(®_mutex); lr = get_last_request(); if (!reg_dev_ignore_cell_hint(wiphy)) @@ -2316,12 +2270,10 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); if (!request_wiphy || request_wiphy != wiphy) - goto out; + return; lr->wiphy_idx = WIPHY_IDX_INVALID; lr->country_ie_env = ENVIRON_ANY; -out: - mutex_unlock(®_mutex); } static void reg_timeout_work(struct work_struct *work) @@ -2385,9 +2337,9 @@ void regulatory_exit(void) cancel_delayed_work_sync(®_timeout); /* Lock to suppress warnings */ - mutex_lock(®_mutex); + rtnl_lock(); reset_regdomains(true, NULL); - mutex_unlock(®_mutex); + rtnl_unlock(); dev_set_uevent_suppress(®_pdev->dev, true); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index fd99ea495b7e..dd01b58fa78c 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -169,7 +169,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) union iwreq_data wrqu; #endif - lockdep_assert_held(&rdev->sched_scan_mtx); + ASSERT_RTNL(); request = rdev->scan_req; @@ -230,9 +230,9 @@ void __cfg80211_scan_done(struct work_struct *wk) rdev = container_of(wk, struct cfg80211_registered_device, scan_done_wk); - mutex_lock(&rdev->sched_scan_mtx); + rtnl_lock(); ___cfg80211_scan_done(rdev, false); - mutex_unlock(&rdev->sched_scan_mtx); + rtnl_unlock(); } void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) @@ -241,6 +241,7 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req); request->aborted = aborted; + request->notified = true; queue_work(cfg80211_wq, &wiphy_to_dev(request->wiphy)->scan_done_wk); } EXPORT_SYMBOL(cfg80211_scan_done); @@ -255,7 +256,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk) request = rdev->sched_scan_req; - mutex_lock(&rdev->sched_scan_mtx); + rtnl_lock(); /* we don't have sched_scan_req anymore if the scan is stopping */ if (request) { @@ -270,7 +271,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk) nl80211_send_sched_scan_results(rdev, request->dev); } - mutex_unlock(&rdev->sched_scan_mtx); + rtnl_unlock(); } void cfg80211_sched_scan_results(struct wiphy *wiphy) @@ -289,9 +290,9 @@ void cfg80211_sched_scan_stopped(struct wiphy *wiphy) trace_cfg80211_sched_scan_stopped(wiphy); - mutex_lock(&rdev->sched_scan_mtx); + rtnl_lock(); __cfg80211_stop_sched_scan(rdev, true); - mutex_unlock(&rdev->sched_scan_mtx); + rtnl_unlock(); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped); @@ -300,7 +301,7 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, { struct net_device *dev; - lockdep_assert_held(&rdev->sched_scan_mtx); + ASSERT_RTNL(); if (!rdev->sched_scan_req) return -ENOENT; @@ -1040,6 +1041,25 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) EXPORT_SYMBOL(cfg80211_unlink_bss); #ifdef CONFIG_CFG80211_WEXT +static struct cfg80211_registered_device * +cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) +{ + struct cfg80211_registered_device *rdev; + struct net_device *dev; + + ASSERT_RTNL(); + + dev = dev_get_by_index(net, ifindex); + if (!dev) + return ERR_PTR(-ENODEV); + if (dev->ieee80211_ptr) + rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); + else + rdev = ERR_PTR(-ENODEV); + dev_put(dev); + return rdev; +} + int cfg80211_wext_siwscan(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) @@ -1062,7 +1082,6 @@ int cfg80211_wext_siwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); - mutex_lock(&rdev->sched_scan_mtx); if (rdev->scan_req) { err = -EBUSY; goto out; @@ -1169,9 +1188,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, dev_hold(dev); } out: - mutex_unlock(&rdev->sched_scan_mtx); kfree(creq); - cfg80211_unlock_rdev(rdev); return err; } EXPORT_SYMBOL_GPL(cfg80211_wext_siwscan); @@ -1470,10 +1487,8 @@ int cfg80211_wext_giwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); - if (rdev->scan_req) { - res = -EAGAIN; - goto out; - } + if (rdev->scan_req) + return -EAGAIN; res = ieee80211_scan_results(rdev, info, extra, data->length); data->length = 0; @@ -1482,8 +1497,6 @@ int cfg80211_wext_giwscan(struct net_device *dev, res = 0; } - out: - cfg80211_unlock_rdev(rdev); return res; } EXPORT_SYMBOL_GPL(cfg80211_wext_giwscan); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 3ed35c345cae..81be95f3be74 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -43,35 +43,29 @@ static bool cfg80211_is_all_idle(void) struct wireless_dev *wdev; bool is_all_idle = true; - mutex_lock(&cfg80211_mutex); - /* * All devices must be idle as otherwise if you are actively * scanning some new beacon hints could be learned and would * count as new regulatory hints. */ list_for_each_entry(rdev, &cfg80211_rdev_list, list) { - cfg80211_lock_rdev(rdev); list_for_each_entry(wdev, &rdev->wdev_list, list) { wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) is_all_idle = false; wdev_unlock(wdev); } - cfg80211_unlock_rdev(rdev); } - mutex_unlock(&cfg80211_mutex); - return is_all_idle; } static void disconnect_work(struct work_struct *work) { - if (!cfg80211_is_all_idle()) - return; - - regulatory_hint_disconnect(); + rtnl_lock(); + if (cfg80211_is_all_idle()) + regulatory_hint_disconnect(); + rtnl_unlock(); } static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); @@ -85,7 +79,6 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) ASSERT_RTNL(); ASSERT_RDEV_LOCK(rdev); ASSERT_WDEV_LOCK(wdev); - lockdep_assert_held(&rdev->sched_scan_mtx); if (rdev->scan_req) return -EBUSY; @@ -176,13 +169,13 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) case CFG80211_CONN_AUTHENTICATE_NEXT: BUG_ON(!rdev->ops->auth); wdev->conn->state = CFG80211_CONN_AUTHENTICATING; - return __cfg80211_mlme_auth(rdev, wdev->netdev, - params->channel, params->auth_type, - params->bssid, - params->ssid, params->ssid_len, - NULL, 0, - params->key, params->key_len, - params->key_idx, NULL, 0); + return cfg80211_mlme_auth(rdev, wdev->netdev, + params->channel, params->auth_type, + params->bssid, + params->ssid, params->ssid_len, + NULL, 0, + params->key, params->key_len, + params->key_idx, NULL, 0); case CFG80211_CONN_ASSOCIATE_NEXT: BUG_ON(!rdev->ops->assoc); wdev->conn->state = CFG80211_CONN_ASSOCIATING; @@ -198,19 +191,19 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) req.vht_capa = params->vht_capa; req.vht_capa_mask = params->vht_capa_mask; - err = __cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel, - params->bssid, params->ssid, - params->ssid_len, &req); + err = cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel, + params->bssid, params->ssid, + params->ssid_len, &req); if (err) - __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, - NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, - false); + cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, + NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, + false); return err; case CFG80211_CONN_DEAUTH_ASSOC_FAIL: - __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, - NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, false); + cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, + NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); /* return an error so that we call __cfg80211_connect_result() */ return -EINVAL; default: @@ -226,9 +219,6 @@ void cfg80211_conn_work(struct work_struct *work) u8 bssid_buf[ETH_ALEN], *bssid = NULL; rtnl_lock(); - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (!wdev->netdev) @@ -256,9 +246,6 @@ void cfg80211_conn_work(struct work_struct *work) wdev_unlock(wdev); } - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); rtnl_unlock(); } @@ -773,11 +760,11 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, } EXPORT_SYMBOL(cfg80211_disconnected); -int __cfg80211_connect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys, - const u8 *prev_bssid) +int cfg80211_connect(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_connect_params *connect, + struct cfg80211_cached_keys *connkeys, + const u8 *prev_bssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_bss *bss = NULL; @@ -924,27 +911,8 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev, } } -int cfg80211_connect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys) -{ - int err; - - mutex_lock(&rdev->devlist_mtx); - /* might request scan - scan_mtx -> wdev_mtx dependency */ - mutex_lock(&rdev->sched_scan_mtx); - wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_connect(rdev, dev, connect, connkeys, NULL); - wdev_unlock(dev->ieee80211_ptr); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - - return err; -} - -int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, - struct net_device *dev, u16 reason, bool wextev) +int cfg80211_disconnect(struct cfg80211_registered_device *rdev, + struct net_device *dev, u16 reason, bool wextev) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; @@ -979,7 +947,7 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, } /* wdev->conn->params.bssid must be set if > SCANNING */ - err = __cfg80211_mlme_deauth(rdev, dev, + err = cfg80211_mlme_deauth(rdev, dev, wdev->conn->params.bssid, NULL, 0, reason, false); if (err) @@ -1001,19 +969,6 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, return 0; } -int cfg80211_disconnect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - u16 reason, bool wextev) -{ - int err; - - wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_disconnect(rdev, dev, reason, wextev); - wdev_unlock(dev->ieee80211_ptr); - - return err; -} - void cfg80211_sme_disassoc(struct net_device *dev, struct cfg80211_internal_bss *bss) { @@ -1036,6 +991,6 @@ void cfg80211_sme_disassoc(struct net_device *dev, memcpy(bssid, bss->pub.bssid, ETH_ALEN); - __cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, false); + cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); } diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 8f28b9f798d8..360a42c6f694 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -91,6 +91,7 @@ static void cfg80211_leave_all(struct cfg80211_registered_device *rdev) cfg80211_leave(rdev, wdev); } +#ifdef CONFIG_PM static int wiphy_suspend(struct device *dev, pm_message_t state) { struct cfg80211_registered_device *rdev = dev_to_rdev(dev); @@ -100,10 +101,10 @@ static int wiphy_suspend(struct device *dev, pm_message_t state) rtnl_lock(); if (rdev->wiphy.registered) { - if (!rdev->wowlan) + if (!rdev->wiphy.wowlan_config) cfg80211_leave_all(rdev); if (rdev->ops->suspend) - ret = rdev_suspend(rdev, rdev->wowlan); + ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config); if (ret == 1) { /* Driver refuse to configure wowlan */ cfg80211_leave_all(rdev); @@ -132,6 +133,7 @@ static int wiphy_resume(struct device *dev) return ret; } +#endif static const void *wiphy_namespace(struct device *d) { @@ -146,8 +148,10 @@ struct class ieee80211_class = { .dev_release = wiphy_dev_release, .dev_attrs = ieee80211_dev_attrs, .dev_uevent = wiphy_uevent, +#ifdef CONFIG_PM .suspend = wiphy_suspend, .resume = wiphy_resume, +#endif .ns_type = &net_ns_type_operations, .namespace = wiphy_namespace, }; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 5755bc14abbd..23fafeae8a10 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1911,12 +1911,12 @@ TRACE_EVENT(cfg80211_send_rx_assoc, NETDEV_PR_ARG, MAC_PR_ARG(bssid), CHAN_PR_ARG) ); -DEFINE_EVENT(netdev_evt_only, __cfg80211_send_deauth, +DEFINE_EVENT(netdev_evt_only, cfg80211_send_deauth, TP_PROTO(struct net_device *netdev), TP_ARGS(netdev) ); -DEFINE_EVENT(netdev_evt_only, __cfg80211_send_disassoc, +DEFINE_EVENT(netdev_evt_only, cfg80211_send_disassoc, TP_PROTO(struct net_device *netdev), TP_ARGS(netdev) ); diff --git a/net/wireless/util.c b/net/wireless/util.c index f5ad4d94ba88..74458b7f61eb 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -33,6 +33,29 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, } EXPORT_SYMBOL(ieee80211_get_response_rate); +u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband) +{ + struct ieee80211_rate *bitrates; + u32 mandatory_rates = 0; + enum ieee80211_rate_flags mandatory_flag; + int i; + + if (WARN_ON(!sband)) + return 1; + + if (sband->band == IEEE80211_BAND_2GHZ) + mandatory_flag = IEEE80211_RATE_MANDATORY_B; + else + mandatory_flag = IEEE80211_RATE_MANDATORY_A; + + bitrates = sband->bitrates; + for (i = 0; i < sband->n_bitrates; i++) + if (bitrates[i].flags & mandatory_flag) + mandatory_rates |= BIT(i); + return mandatory_rates; +} +EXPORT_SYMBOL(ieee80211_mandatory_rates); + int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band) { /* see 802.11 17.3.8.3.2 and Annex J @@ -785,12 +808,8 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev) ASSERT_RTNL(); ASSERT_RDEV_LOCK(rdev); - mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->wdev_list, list) cfg80211_process_wdev_events(wdev); - - mutex_unlock(&rdev->devlist_mtx); } int cfg80211_change_iface(struct cfg80211_registered_device *rdev, @@ -822,10 +841,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EBUSY; if (ntype != otype && netif_running(dev)) { - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr, ntype); - mutex_unlock(&rdev->devlist_mtx); if (err) return err; @@ -841,8 +858,10 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: + wdev_lock(dev->ieee80211_ptr); cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, true); + wdev_unlock(dev->ieee80211_ptr); break; case NL80211_IFTYPE_MESH_POINT: /* mesh should be handled? */ @@ -1169,6 +1188,9 @@ bool ieee80211_operating_class_to_band(u8 operating_class, case 84: *band = IEEE80211_BAND_2GHZ; return true; + case 180: + *band = IEEE80211_BAND_60GHZ; + return true; } return false; @@ -1184,8 +1206,6 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, if (!beacon_int) return -EINVAL; - mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->wdev_list, list) { if (!wdev->beacon_interval) continue; @@ -1195,8 +1215,6 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, } } - mutex_unlock(&rdev->devlist_mtx); - return res; } @@ -1220,7 +1238,6 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, int i, j; ASSERT_RTNL(); - lockdep_assert_held(&rdev->devlist_mtx); if (WARN_ON(hweight32(radar_detect) > 1)) return -EINVAL; diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index d997d0f0c54a..e7c6e862580d 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -72,7 +72,6 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, struct cfg80211_registered_device *rdev; struct vif_params vifparams; enum nl80211_iftype type; - int ret; rdev = wiphy_to_dev(wdev->wiphy); @@ -98,11 +97,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, memset(&vifparams, 0, sizeof(vifparams)); - cfg80211_lock_rdev(rdev); - ret = cfg80211_change_iface(rdev, dev, type, NULL, &vifparams); - cfg80211_unlock_rdev(rdev); - - return ret; + return cfg80211_change_iface(rdev, dev, type, NULL, &vifparams); } EXPORT_SYMBOL_GPL(cfg80211_wext_siwmode); @@ -579,13 +574,10 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, { int err; - /* devlist mutex needed for possible IBSS re-join */ - mutex_lock(&rdev->devlist_mtx); wdev_lock(dev->ieee80211_ptr); err = __cfg80211_set_encryption(rdev, dev, pairwise, addr, remove, tx_key, idx, params); wdev_unlock(dev->ieee80211_ptr); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -787,7 +779,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, struct cfg80211_chan_def chandef = { .width = NL80211_CHAN_WIDTH_20_NOHT, }; - int freq, err; + int freq; switch (wdev->iftype) { case NL80211_IFTYPE_STATION: @@ -804,10 +796,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; - mutex_lock(&rdev->devlist_mtx); - err = cfg80211_set_monitor_channel(rdev, &chandef); - mutex_unlock(&rdev->devlist_mtx); - return err; + return cfg80211_set_monitor_channel(rdev, &chandef); case NL80211_IFTYPE_MESH_POINT: freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); if (freq < 0) @@ -818,10 +807,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; - mutex_lock(&rdev->devlist_mtx); - err = cfg80211_set_mesh_channel(rdev, wdev, &chandef); - mutex_unlock(&rdev->devlist_mtx); - return err; + return cfg80211_set_mesh_channel(rdev, wdev, &chandef); default: return -EOPNOTSUPP; } diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index e79cb5c0655a..a53f8404f451 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -54,8 +54,8 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, if (wdev->wext.prev_bssid_valid) prev_bssid = wdev->wext.prev_bssid; - err = __cfg80211_connect(rdev, wdev->netdev, - &wdev->wext.connect, ck, prev_bssid); + err = cfg80211_connect(rdev, wdev->netdev, + &wdev->wext.connect, ck, prev_bssid); if (err) kfree(ck); @@ -87,9 +87,6 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, return -EINVAL; } - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) { @@ -103,8 +100,8 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, /* if SSID set, we'll try right again, avoid event */ if (wdev->wext.connect.ssid_len) event = false; - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, event); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, event); if (err) goto out; } @@ -136,9 +133,6 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); return err; } @@ -190,9 +184,6 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, if (len > 0 && ssid[len - 1] == '\0') len--; - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); err = 0; @@ -208,8 +199,8 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, /* if SSID set now, we'll try to connect, avoid event */ if (len) event = false; - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, event); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, event); if (err) goto out; } @@ -226,9 +217,6 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); return err; } @@ -287,9 +275,6 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid)) bssid = NULL; - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) { @@ -303,8 +288,8 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, ether_addr_equal(bssid, wdev->wext.connect.bssid)) goto out; - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, false); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, false); if (err) goto out; } @@ -318,9 +303,6 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); return err; } @@ -383,8 +365,8 @@ int cfg80211_wext_siwgenie(struct net_device *dev, wdev->wext.ie_len = ie_len; if (wdev->sme_state != CFG80211_SME_IDLE) { - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, false); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, false); if (err) goto out; } @@ -420,8 +402,7 @@ int cfg80211_wext_siwmlme(struct net_device *dev, switch (mlme->cmd) { case IW_MLME_DEAUTH: case IW_MLME_DISASSOC: - err = __cfg80211_disconnect(rdev, dev, mlme->reason_code, - true); + err = cfg80211_disconnect(rdev, dev, mlme->reason_code, true); break; default: err = -EOPNOTSUPP; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 37ca9694aabe..1d964e23853f 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -224,7 +224,7 @@ static void x25_kill_by_device(struct net_device *dev) static int x25_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct x25_neigh *nb; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 0cf003dfa8fc..eb4a84288648 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -89,7 +89,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) err = x->type->output(x, skb); if (err == -EINPROGRESS) - goto out_exit; + goto out; resume: if (err) { @@ -107,15 +107,14 @@ resume: x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); - err = 0; + return 0; -out_exit: - return err; error: spin_unlock_bh(&x->lock); error_nolock: kfree_skb(skb); - goto out_exit; +out: + return err; } int xfrm_output_resume(struct sk_buff *skb, int err) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ea970b8002a2..e52cab3591dd 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2785,7 +2785,7 @@ static void __net_init xfrm_dst_ops_init(struct net *net) static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_DOWN: |