diff options
Diffstat (limited to 'net')
165 files changed, 2568 insertions, 1359 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c index e085bcc754f6..1eb05d80b07b 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -871,10 +871,10 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) */ del_timer_sync(&app->join_timer); - spin_lock(&app->lock); + spin_lock_bh(&app->lock); mrp_mad_event(app, MRP_EVENT_TX); mrp_pdu_queue(app); - spin_unlock(&app->lock); + spin_unlock_bh(&app->lock); mrp_queue_xmit(app); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 9424f3718ea7..2fb2d88e8c2e 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -341,7 +341,7 @@ static void __vlan_device_event(struct net_device *dev, unsigned long event) static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vlan_group *grp; struct vlan_info *vlan_info; int i, flgs; diff --git a/net/Kconfig b/net/Kconfig index 08de901415ee..523e43e6da1b 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -218,6 +218,7 @@ source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" source "net/vmw_vsock/Kconfig" source "net/netlink/Kconfig" +source "net/mpls/Kconfig" config RPS boolean diff --git a/net/Makefile b/net/Makefile index 091e7b04f301..9492e8cb64e9 100644 --- a/net/Makefile +++ b/net/Makefile @@ -70,3 +70,4 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ obj-$(CONFIG_VSOCKETS) += vmw_vsock/ +obj-$(CONFIG_NET_MPLS_GSO) += mpls/ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 173a2e82f486..690356fa52b9 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -332,7 +332,7 @@ static void aarp_expire_timeout(unsigned long unused) static int aarp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); int ct; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index ef12839a7cfe..7fee50d637f9 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -644,7 +644,7 @@ static inline void atalk_dev_down(struct net_device *dev) static int ddp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/atm/clip.c b/net/atm/clip.c index 8ae3a7879335..8215f7cb170b 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -539,9 +539,9 @@ static int clip_create(int number) } static int clip_device_event(struct notifier_block *this, unsigned long event, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; @@ -575,6 +575,7 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event, void *ifa) { struct in_device *in_dev; + struct netdev_notifier_info info; in_dev = ((struct in_ifaddr *)ifa)->ifa_dev; /* @@ -583,7 +584,8 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event, */ if (event != NETDEV_UP) return NOTIFY_DONE; - return clip_device_event(this, NETDEV_CHANGE, in_dev->dev); + netdev_notifier_info_init(&info, in_dev->dev); + return clip_device_event(this, NETDEV_CHANGE, &info); } static struct notifier_block clip_dev_notifier = { diff --git a/net/atm/mpc.c b/net/atm/mpc.c index d4cc1be5c364..3af12755cd04 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -998,14 +998,12 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc) } static int mpoa_event_listener(struct notifier_block *mpoa_notifier, - unsigned long event, void *dev_ptr) + unsigned long event, void *ptr) { - struct net_device *dev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mpoa_client *mpc; struct lec_priv *priv; - dev = dev_ptr; - if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index e277e38f736b..4b4d2b779ec1 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -111,9 +111,9 @@ again: * Handle device status changes. */ static int ax25_device_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; @@ -1974,7 +1974,7 @@ static struct packet_type ax25_packet_type __read_mostly = { }; static struct notifier_block ax25_dev_notifier = { - .notifier_call =ax25_device_event, + .notifier_call = ax25_device_event, }; static int __init ax25_init(void) diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index acbac2a9c62f..489bb36f1b94 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -32,7 +32,6 @@ batman-adv-y += icmp_socket.o batman-adv-y += main.o batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o batman-adv-y += originator.o -batman-adv-y += ring_buffer.o batman-adv-y += routing.o batman-adv-y += send.o batman-adv-y += soft-interface.o diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 071f288b77a8..d07323b3e9b8 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -19,7 +19,6 @@ #include "main.h" #include "translation-table.h" -#include "ring_buffer.h" #include "originator.h" #include "routing.h" #include "gateway_common.h" @@ -29,16 +28,57 @@ #include "bat_algo.h" #include "network-coding.h" +/** + * batadv_ring_buffer_set - update the ring buffer with the given value + * @lq_recv: pointer to the ring buffer + * @lq_index: index to store the value at + * @value: value to store in the ring buffer + */ +static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, + uint8_t value) +{ + lq_recv[*lq_index] = value; + *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE; +} + +/** + * batadv_ring_buffer_set - compute the average of all non-zero values stored + * in the given ring buffer + * @lq_recv: pointer to the ring buffer + * + * Returns computed average value. + */ +static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]) +{ + const uint8_t *ptr; + uint16_t count = 0, i = 0, sum = 0; + + ptr = lq_recv; + + while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) { + if (*ptr != 0) { + count++; + sum += *ptr; + } + + i++; + ptr++; + } + + if (count == 0) + return 0; + + return (uint8_t)(sum / count); +} static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, const uint8_t *neigh_addr, struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh, __be32 seqno) + struct batadv_orig_node *orig_neigh) { struct batadv_neigh_node *neigh_node; - neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, - ntohl(seqno)); + neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr); if (!neigh_node) goto out; @@ -413,18 +453,16 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, else skb_size = packet_len; - skb_size += ETH_HLEN + NET_IP_ALIGN; + skb_size += ETH_HLEN; - forw_packet_aggr->skb = dev_alloc_skb(skb_size); + forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); if (!forw_packet_aggr->skb) { if (!own_packet) atomic_inc(&bat_priv->batman_queue_left); kfree(forw_packet_aggr); goto out; } - skb_reserve(forw_packet_aggr->skb, ETH_HLEN + NET_IP_ALIGN); - - INIT_HLIST_NODE(&forw_packet_aggr->list); + skb_reserve(forw_packet_aggr->skb, ETH_HLEN); skb_buff = skb_put(forw_packet_aggr->skb, packet_len); forw_packet_aggr->packet_len = packet_len; @@ -590,6 +628,41 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, if_incoming, 0, batadv_iv_ogm_fwd_send_time()); } +/** + * batadv_iv_ogm_slide_own_bcast_window - bitshift own OGM broadcast windows for + * the given interface + * @hard_iface: the interface for which the windows have to be shifted + */ +static void +batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) +{ + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + struct batadv_orig_node *orig_node; + unsigned long *word; + uint32_t i; + size_t word_index; + uint8_t *w; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + spin_lock_bh(&orig_node->ogm_cnt_lock); + word_index = hard_iface->if_num * BATADV_NUM_WORDS; + word = &(orig_node->bcast_own[word_index]); + + batadv_bit_get_packet(bat_priv, word, 1, 0); + w = &orig_node->bcast_own_sum[hard_iface->if_num]; + *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE); + spin_unlock_bh(&orig_node->ogm_cnt_lock); + } + rcu_read_unlock(); + } +} + static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); @@ -634,7 +707,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) batadv_ogm_packet->gw_flags = BATADV_NO_FLAGS; } - batadv_slide_own_bcast_window(hard_iface); + batadv_iv_ogm_slide_own_bcast_window(hard_iface); batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff, hard_iface->bat_iv.ogm_buff_len, hard_iface, 1, batadv_iv_ogm_emit_send_time(bat_priv)); @@ -670,7 +743,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && tmp_neigh_node->if_incoming == if_incoming && atomic_inc_not_zero(&tmp_neigh_node->refcount)) { - if (neigh_node) + if (WARN(neigh_node, "too many matching neigh_nodes")) batadv_neigh_node_free_ref(neigh_node); neigh_node = tmp_neigh_node; continue; @@ -696,8 +769,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, neigh_node = batadv_iv_ogm_neigh_new(if_incoming, ethhdr->h_source, - orig_node, orig_tmp, - batadv_ogm_packet->seqno); + orig_node, orig_tmp); batadv_orig_node_free_ref(orig_tmp); if (!neigh_node) @@ -829,8 +901,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, neigh_node = batadv_iv_ogm_neigh_new(if_incoming, orig_neigh_node->orig, orig_neigh_node, - orig_neigh_node, - batadv_ogm_packet->seqno); + orig_neigh_node); if (!neigh_node) goto out; @@ -991,7 +1062,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, struct batadv_neigh_node *orig_neigh_router = NULL; int has_directlink_flag; int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; - int is_broadcast = 0, is_bidirect; + int is_bidirect; bool is_single_hop_neigh = false; bool is_from_best_next_hop = false; int is_duplicate, sameseq, simlar_ttl; @@ -1054,19 +1125,9 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, if (batadv_compare_eth(batadv_ogm_packet->prev_sender, hard_iface->net_dev->dev_addr)) is_my_oldorig = 1; - - if (is_broadcast_ether_addr(ethhdr->h_source)) - is_broadcast = 1; } rcu_read_unlock(); - if (batadv_ogm_packet->header.version != BATADV_COMPAT_VERSION) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: incompatible batman version (%i)\n", - batadv_ogm_packet->header.version); - return; - } - if (is_my_addr) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: received my own broadcast (sender: %pM)\n", @@ -1074,13 +1135,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, return; } - if (is_broadcast) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: ignoring all packets with broadcast source addr (sender: %pM)\n", - ethhdr->h_source); - return; - } - if (is_my_orig) { unsigned long *word; int offset; @@ -1288,7 +1342,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, skb->len + ETH_HLEN); packet_len = skb_headlen(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); packet_buff = skb->data; batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 379061c72549..e9d8e0b3c3d0 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -180,7 +180,7 @@ static struct batadv_bla_claim */ static struct batadv_bla_backbone_gw * batadv_backbone_hash_find(struct batadv_priv *bat_priv, - uint8_t *addr, short vid) + uint8_t *addr, unsigned short vid) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; @@ -257,7 +257,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) */ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, - short vid, int claimtype) + unsigned short vid, int claimtype) { struct sk_buff *skb; struct ethhdr *ethhdr; @@ -307,7 +307,8 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, */ memcpy(ethhdr->h_source, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): CLAIM %pM on vid %d\n", mac, vid); + "bla_send_claim(): CLAIM %pM on vid %d\n", mac, + BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_UNCLAIM: /* unclaim frame @@ -316,7 +317,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): UNCLAIM %pM on vid %d\n", mac, - vid); + BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_ANNOUNCE: /* announcement frame @@ -325,7 +326,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): ANNOUNCE of %pM on vid %d\n", - ethhdr->h_source, vid); + ethhdr->h_source, BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_REQUEST: /* request frame @@ -335,13 +336,15 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); memcpy(ethhdr->h_dest, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): REQUEST of %pM to %pMon vid %d\n", - ethhdr->h_source, ethhdr->h_dest, vid); + "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n", + ethhdr->h_source, ethhdr->h_dest, + BATADV_PRINT_VID(vid)); break; } - if (vid != -1) - skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), vid); + if (vid & BATADV_VLAN_HAS_TAG) + skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), + vid & VLAN_VID_MASK); skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, soft_iface); @@ -367,7 +370,7 @@ out: */ static struct batadv_bla_backbone_gw * batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, - short vid, bool own_backbone) + unsigned short vid, bool own_backbone) { struct batadv_bla_backbone_gw *entry; struct batadv_orig_node *orig_node; @@ -380,7 +383,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n", - orig, vid); + orig, BATADV_PRINT_VID(vid)); entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) @@ -434,7 +437,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, static void batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -456,7 +459,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, */ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - short vid) + unsigned short vid) { struct hlist_head *head; struct batadv_hashtable *hash; @@ -547,7 +550,7 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, * @backbone_gw: the backbone gateway which claims it */ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, - const uint8_t *mac, const short vid, + const uint8_t *mac, const unsigned short vid, struct batadv_bla_backbone_gw *backbone_gw) { struct batadv_bla_claim *claim; @@ -572,7 +575,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, atomic_set(&claim->refcount, 2); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); hash_added = batadv_hash_add(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim, @@ -591,7 +594,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): changing ownership for %pM, vid %d\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); batadv_backbone_gw_free_ref(claim->backbone_gw); @@ -611,7 +614,7 @@ claim_free_ref: * given mac address and vid. */ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, - const uint8_t *mac, const short vid) + const uint8_t *mac, const unsigned short vid) { struct batadv_bla_claim search_claim, *claim; @@ -622,7 +625,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, return; batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim); @@ -637,7 +640,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, /* check for ANNOUNCE frame, return 1 if handled */ static int batadv_handle_announce(struct batadv_priv *bat_priv, uint8_t *an_addr, uint8_t *backbone_addr, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; uint16_t crc; @@ -658,12 +661,13 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", - vid, backbone_gw->orig, crc); + BATADV_PRINT_VID(vid), backbone_gw->orig, crc); if (backbone_gw->crc != crc) { batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", - backbone_gw->orig, backbone_gw->vid, + backbone_gw->orig, + BATADV_PRINT_VID(backbone_gw->vid), backbone_gw->crc, crc); batadv_bla_send_request(backbone_gw); @@ -685,7 +689,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, static int batadv_handle_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, - struct ethhdr *ethhdr, short vid) + struct ethhdr *ethhdr, unsigned short vid) { /* check for REQUEST frame */ if (!batadv_compare_eth(backbone_addr, ethhdr->h_dest)) @@ -699,7 +703,7 @@ static int batadv_handle_request(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_request(): REQUEST vid %d (sent by %pM)...\n", - vid, ethhdr->h_source); + BATADV_PRINT_VID(vid), ethhdr->h_source); batadv_bla_answer_request(bat_priv, primary_if, vid); return 1; @@ -709,7 +713,7 @@ static int batadv_handle_request(struct batadv_priv *bat_priv, static int batadv_handle_unclaim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, - uint8_t *claim_addr, short vid) + uint8_t *claim_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -727,7 +731,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, /* this must be an UNCLAIM frame */ batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n", - claim_addr, vid, backbone_gw->orig); + claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig); batadv_bla_del_claim(bat_priv, claim_addr, vid); batadv_backbone_gw_free_ref(backbone_gw); @@ -738,7 +742,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, static int batadv_handle_claim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, uint8_t *claim_addr, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -861,14 +865,15 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, struct batadv_bla_claim_dst *bla_dst; uint16_t proto; int headlen; - short vid = -1; + unsigned short vid = BATADV_NO_FLAGS; int ret; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { vhdr = (struct vlan_ethhdr *)ethhdr; vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + vid |= BATADV_VLAN_HAS_TAG; proto = ntohs(vhdr->h_vlan_encapsulated_proto); headlen = sizeof(*vhdr); } else { @@ -885,7 +890,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, return 0; /* pskb_may_pull() may have modified the pointers, get ethhdr again */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); arphdr = (struct arphdr *)((uint8_t *)ethhdr + headlen); /* Check whether the ARP frame carries a valid @@ -910,7 +915,8 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, if (ret == 1) batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, vid, hw_src, hw_dst); + ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, + hw_dst); if (ret < 2) return ret; @@ -945,7 +951,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, vid, hw_src, hw_dst); + ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst); return 1; } @@ -1358,7 +1364,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct ethhdr *ethhdr; struct vlan_ethhdr *vhdr; struct batadv_bla_backbone_gw *backbone_gw; - short vid = -1; + unsigned short vid = BATADV_NO_FLAGS; if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance)) return 0; @@ -1375,6 +1381,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size); vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + vid |= BATADV_VLAN_HAS_TAG; } /* see if this originator is a backbone gw for this VLAN */ @@ -1424,15 +1431,15 @@ void batadv_bla_free(struct batadv_priv *bat_priv) * returns 1, otherwise it returns 0 and the caller shall further * process the skb. */ -int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, - bool is_bcast) +int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, bool is_bcast) { struct ethhdr *ethhdr; struct batadv_bla_claim search_claim, *claim = NULL; struct batadv_hard_iface *primary_if; int ret; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) @@ -1519,7 +1526,8 @@ out: * returns 1, otherwise it returns 0 and the caller shall further * process the skb. */ -int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) +int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid) { struct ethhdr *ethhdr; struct batadv_bla_claim search_claim, *claim = NULL; @@ -1539,7 +1547,7 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) if (batadv_bla_process_claim(bat_priv, primary_if, skb)) goto handled; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); if (unlikely(atomic_read(&bat_priv->bla.num_requests))) /* don't allow broadcasts while requests are in flight */ @@ -1623,8 +1631,8 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) hlist_for_each_entry_rcu(claim, head, hash_entry) { is_own = batadv_compare_eth(claim->backbone_gw->orig, primary_addr); - seq_printf(seq, " * %pM on % 5d by %pM [%c] (%#.4x)\n", - claim->addr, claim->vid, + seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n", + claim->addr, BATADV_PRINT_VID(claim->vid), claim->backbone_gw->orig, (is_own ? 'x' : ' '), claim->backbone_gw->crc); @@ -1676,10 +1684,10 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) if (is_own) continue; - seq_printf(seq, - " * %pM on % 5d % 4i.%03is (%#.4x)\n", - backbone_gw->orig, backbone_gw->vid, - secs, msecs, backbone_gw->crc); + seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n", + backbone_gw->orig, + BATADV_PRINT_VID(backbone_gw->vid), secs, + msecs, backbone_gw->crc); } rcu_read_unlock(); } diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index dea2fbc5d98d..4b102e71e5bd 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -21,9 +21,10 @@ #define _NET_BATMAN_ADV_BLA_H_ #ifdef CONFIG_BATMAN_ADV_BLA -int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, - bool is_bcast); -int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid); +int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, bool is_bcast); +int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid); int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct batadv_orig_node *orig_node, int hdr_size); int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); @@ -42,13 +43,14 @@ void batadv_bla_free(struct batadv_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_BLA */ static inline int batadv_bla_rx(struct batadv_priv *bat_priv, - struct sk_buff *skb, short vid, bool is_bcast) + struct sk_buff *skb, unsigned short vid, + bool is_bcast) { return 0; } static inline int batadv_bla_tx(struct batadv_priv *bat_priv, - struct sk_buff *skb, short vid) + struct sk_buff *skb, unsigned short vid) { return 0; } diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 239992021b1d..06345d401588 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -45,9 +45,9 @@ static void batadv_dat_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_dat_entry_free_ref - decrements the dat_entry refcounter and possibly + * batadv_dat_entry_free_ref - decrement the dat_entry refcounter and possibly * free it - * @dat_entry: the oentry to free + * @dat_entry: the entry to free */ static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) { @@ -56,10 +56,10 @@ static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) } /** - * batadv_dat_to_purge - checks whether a dat_entry has to be purged or not + * batadv_dat_to_purge - check whether a dat_entry has to be purged or not * @dat_entry: the entry to check * - * Returns true if the entry has to be purged now, false otherwise + * Returns true if the entry has to be purged now, false otherwise. */ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) { @@ -75,8 +75,8 @@ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) * returns a boolean value: true is the entry has to be deleted, * false otherwise * - * Loops over each entry in the DAT local storage and delete it if and only if - * the to_purge function passed as argument returns true + * Loops over each entry in the DAT local storage and deletes it if and only if + * the to_purge function passed as argument returns true. */ static void __batadv_dat_purge(struct batadv_priv *bat_priv, bool (*to_purge)(struct batadv_dat_entry *)) @@ -97,7 +97,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, spin_lock_bh(list_lock); hlist_for_each_entry_safe(dat_entry, node_tmp, head, hash_entry) { - /* if an helper function has been passed as parameter, + /* if a helper function has been passed as parameter, * ask it if the entry has to be purged or not */ if (to_purge && !to_purge(dat_entry)) @@ -134,7 +134,7 @@ static void batadv_dat_purge(struct work_struct *work) * @node: node in the local table * @data2: second object to compare the node to * - * Returns 1 if the two entry are the same, 0 otherwise + * Returns 1 if the two entries are the same, 0 otherwise. */ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) { @@ -149,7 +149,7 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the hw_src field in the ARP packet + * Returns the value of the hw_src field in the ARP packet. */ static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) { @@ -166,7 +166,7 @@ static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the ip_src field in the ARP packet + * Returns the value of the ip_src field in the ARP packet. */ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) { @@ -178,7 +178,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the hw_dst field in the ARP packet + * Returns the value of the hw_dst field in the ARP packet. */ static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) { @@ -190,7 +190,7 @@ static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the ip_dst field in the ARP packet + * Returns the value of the ip_dst field in the ARP packet. */ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) { @@ -202,7 +202,7 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) * @data: data to hash * @size: size of the hash table * - * Returns the selected index in the hash table for the given data + * Returns the selected index in the hash table for the given data. */ static uint32_t batadv_hash_dat(const void *data, uint32_t size) { @@ -224,12 +224,12 @@ static uint32_t batadv_hash_dat(const void *data, uint32_t size) } /** - * batadv_dat_entry_hash_find - looks for a given dat_entry in the local hash + * batadv_dat_entry_hash_find - look for a given dat_entry in the local hash * table * @bat_priv: the bat priv with all the soft interface information * @ip: search key * - * Returns the dat_entry if found, NULL otherwise + * Returns the dat_entry if found, NULL otherwise. */ static struct batadv_dat_entry * batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) @@ -343,9 +343,6 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, if (hdr_size == 0) return; - /* if the ARP packet is encapsulated in a batman packet, let's print - * some debug messages - */ unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; switch (unicast_4addr_packet->u.header.packet_type) { @@ -409,7 +406,8 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, * @candidate: orig_node under evaluation * @max_orig_node: last selected candidate * - * Returns true if the node has been elected as next candidate or false othrwise + * Returns true if the node has been elected as next candidate or false + * otherwise. */ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, int select, batadv_dat_addr_t tmp_max, @@ -472,7 +470,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, */ cands[select].type = BATADV_DAT_CANDIDATE_NOT_FOUND; - /* iterate over the originator list and find the node with closest + /* iterate over the originator list and find the node with the closest * dat_address which has not been selected yet */ for (i = 0; i < hash->size; i++) { @@ -480,7 +478,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - /* the dht space is a ring and addresses are unsigned */ + /* the dht space is a ring using unsigned addresses */ tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr + ip_key; @@ -512,7 +510,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, } /** - * batadv_dat_select_candidates - selects the nodes which the DHT message has to + * batadv_dat_select_candidates - select the nodes which the DHT message has to * be sent to * @bat_priv: the bat priv with all the soft interface information * @ip_dst: ipv4 to look up in the DHT @@ -521,7 +519,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * closest values (from the LEFT, with wrap around if needed) then the hash * value of the key. ip_dst is the key. * - * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM + * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM. */ static struct batadv_dat_candidate * batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) @@ -558,10 +556,11 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) * @ip: the DHT key * @packet_subtype: unicast4addr packet subtype to use * - * In this function the skb is copied by means of pskb_copy() and is sent as - * unicast packet to each of the selected candidates + * This function copies the skb with pskb_copy() and is sent as unicast packet + * to each of the selected candidates. * - * Returns true if the packet is sent to at least one candidate, false otherwise + * Returns true if the packet is sent to at least one candidate, false + * otherwise. */ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *skb, __be32 ip, @@ -727,7 +726,7 @@ out: * @skb: packet to analyse * @hdr_size: size of the possible header before the ARP packet in the skb * - * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise + * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise. */ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -754,9 +753,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, arphdr = (struct arphdr *)(skb->data + hdr_size + ETH_HLEN); - /* Check whether the ARP packet carries a valid - * IP information - */ + /* check whether the ARP packet carries a valid IP information */ if (arphdr->ar_hrd != htons(ARPHRD_ETHER)) goto out; @@ -784,7 +781,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, if (is_zero_ether_addr(hw_src) || is_multicast_ether_addr(hw_src)) goto out; - /* we don't care about the destination MAC address in ARP requests */ + /* don't care about the destination MAC address in ARP requests */ if (arphdr->ar_op != htons(ARPOP_REQUEST)) { hw_dst = batadv_arp_hw_dst(skb, hdr_size); if (is_zero_ether_addr(hw_dst) || @@ -804,8 +801,8 @@ out: * @skb: packet to check * * Returns true if the message has been sent to the dht candidates, false - * otherwise. In case of true the message has to be enqueued to permit the - * fallback + * otherwise. In case of a positive return value the message has to be enqueued + * to permit the fallback. */ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb) @@ -867,7 +864,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); ret = true; } else { - /* Send the request on the DHT */ + /* Send the request to the DHT */ ret = batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_GET); } @@ -884,7 +881,7 @@ out: * @skb: packet to check * @hdr_size: size of the encapsulation header * - * Returns true if the request has been answered, false otherwise + * Returns true if the request has been answered, false otherwise. */ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -924,10 +921,9 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, if (!skb_new) goto out; - /* to preserve backwards compatibility, here the node has to answer - * using the same packet type it received for the request. This is due - * to that if a node is not using the 4addr packet format it may not - * support it. + /* To preserve backwards compatibility, the node has choose the outgoing + * format based on the incoming request packet type. The assumption is + * that a node not using the 4addr packet format doesn't support it. */ if (hdr_size == sizeof(struct batadv_unicast_4addr_packet)) err = batadv_unicast_4addr_send_skb(bat_priv, skb_new, @@ -977,7 +973,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, batadv_dat_entry_add(bat_priv, ip_dst, hw_dst); /* Send the ARP reply to the candidates for both the IP addresses that - * the node got within the ARP reply + * the node obtained from the ARP reply */ batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); @@ -987,7 +983,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, * DAT storage only * @bat_priv: the bat priv with all the soft interface information * @skb: packet to check - * @hdr_size: siaze of the encapsulation header + * @hdr_size: size of the encapsulation header */ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -1031,11 +1027,11 @@ out: /** * batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped - * (because the node has already got the reply via DAT) or not + * (because the node has already obtained the reply via DAT) or not * @bat_priv: the bat priv with all the soft interface information * @forw_packet: the broadcast packet * - * Returns true if the node can drop the packet, false otherwise + * Returns true if the node can drop the packet, false otherwise. */ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 522243aff2f3..c478e6bcf89b 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -117,6 +117,58 @@ static int batadv_is_valid_iface(const struct net_device *net_dev) return 1; } +/** + * batadv_is_wifi_netdev - check if the given net_device struct is a wifi + * interface + * @net_device: the device to check + * + * Returns true if the net device is a 802.11 wireless device, false otherwise. + */ +static bool batadv_is_wifi_netdev(struct net_device *net_device) +{ +#ifdef CONFIG_WIRELESS_EXT + /* pre-cfg80211 drivers have to implement WEXT, so it is possible to + * check for wireless_handlers != NULL + */ + if (net_device->wireless_handlers) + return true; +#endif + + /* cfg80211 drivers have to set ieee80211_ptr */ + if (net_device->ieee80211_ptr) + return true; + + return false; +} + +/** + * batadv_is_wifi_iface - check if the given interface represented by ifindex + * is a wifi interface + * @ifindex: interface index to check + * + * Returns true if the interface represented by ifindex is a 802.11 wireless + * device, false otherwise. + */ +bool batadv_is_wifi_iface(int ifindex) +{ + struct net_device *net_device = NULL; + bool ret = false; + + if (ifindex == BATADV_NULL_IFINDEX) + goto out; + + net_device = dev_get_by_index(&init_net, ifindex); + if (!net_device) + goto out; + + ret = batadv_is_wifi_netdev(net_device); + +out: + if (net_device) + dev_put(net_device); + return ret; +} + static struct batadv_hard_iface * batadv_hardif_get_active(const struct net_device *soft_iface) { @@ -525,7 +577,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) dev_hold(net_dev); - hard_iface = kmalloc(sizeof(*hard_iface), GFP_ATOMIC); + hard_iface = kzalloc(sizeof(*hard_iface), GFP_ATOMIC); if (!hard_iface) goto release_dev; @@ -541,18 +593,16 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_WORK(&hard_iface->cleanup_work, batadv_hardif_remove_interface_finish); + hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT; + if (batadv_is_wifi_netdev(net_dev)) + hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; + /* extra reference for return */ atomic_set(&hard_iface->refcount, 2); batadv_check_known_mac_addr(hard_iface->net_dev); list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); - /* This can't be called via a bat_priv callback because - * we have no bat_priv yet. - */ - atomic_set(&hard_iface->bat_iv.ogm_seqno, 1); - hard_iface->bat_iv.ogm_buff = NULL; - return hard_iface; free_if: @@ -595,7 +645,7 @@ void batadv_hardif_remove_interfaces(void) static int batadv_hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *net_dev = ptr; + struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); struct batadv_hard_iface *hard_iface; struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; @@ -657,38 +707,6 @@ out: return NOTIFY_DONE; } -/* This function returns true if the interface represented by ifindex is a - * 802.11 wireless device - */ -bool batadv_is_wifi_iface(int ifindex) -{ - struct net_device *net_device = NULL; - bool ret = false; - - if (ifindex == BATADV_NULL_IFINDEX) - goto out; - - net_device = dev_get_by_index(&init_net, ifindex); - if (!net_device) - goto out; - -#ifdef CONFIG_WIRELESS_EXT - /* pre-cfg80211 drivers have to implement WEXT, so it is possible to - * check for wireless_handlers != NULL - */ - if (net_device->wireless_handlers) - ret = true; - else -#endif - /* cfg80211 drivers have to set ieee80211_ptr */ - if (net_device->ieee80211_ptr) - ret = true; -out: - if (net_device) - dev_put(net_device); - return ret; -} - struct notifier_block batadv_hard_if_notifier = { .notifier_call = batadv_hard_if_event, }; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 0ba6c899b2d3..b27508b8085c 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -177,13 +177,13 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, if (len >= sizeof(struct batadv_icmp_packet_rr)) packet_len = sizeof(struct batadv_icmp_packet_rr); - skb = dev_alloc_skb(packet_len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, packet_len + ETH_HLEN); if (!skb) { len = -ENOMEM; goto out; } - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len); if (copy_from_user(icmp_packet, buff, packet_len)) { diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 1240f07ad31d..51aafd669cbb 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -181,6 +181,7 @@ void batadv_mesh_free(struct net_device *soft_iface) batadv_originator_free(bat_priv); free_percpu(bat_priv->bat_counters); + bat_priv->bat_counters = NULL; atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); } diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 59a0d6af15c8..5e9aebb7d56b 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -26,7 +26,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2013.2.0" +#define BATADV_SOURCE_VERSION "2013.3.0" #endif /* B.A.T.M.A.N. parameters */ @@ -76,6 +76,11 @@ #define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */ +/* number of packets to send for broadcasts on different interface types */ +#define BATADV_NUM_BCASTS_DEFAULT 1 +#define BATADV_NUM_BCASTS_WIRELESS 3 +#define BATADV_NUM_BCASTS_MAX 3 + /* msecs after which an ARP_REQUEST is sent in broadcast as fallback */ #define ARP_REQ_DELAY 250 /* numbers of originator to contact for any PUT/GET DHT operation */ @@ -157,6 +162,17 @@ enum batadv_uev_type { #include <linux/seq_file.h> #include "types.h" +/** + * batadv_vlan_flags - flags for the four MSB of any vlan ID field + * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not + */ +enum batadv_vlan_flags { + BATADV_VLAN_HAS_TAG = BIT(15), +}; + +#define BATADV_PRINT_VID(vid) (vid & BATADV_VLAN_HAS_TAG ? \ + (int)(vid & VLAN_VID_MASK) : -1) + extern char batadv_routing_algo[]; extern struct list_head batadv_hardif_list; diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index e84629ece9b7..a487d46e0aec 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1245,7 +1245,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, return; /* Set the mac header as if we actually sent the packet uncoded */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN); memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN); @@ -1359,18 +1359,17 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, * buffer * @skb: data skb to forward * @neigh_node: next hop to forward packet to - * @ethhdr: pointer to the ethernet header inside the skb * * Returns true if the skb was consumed (encoded packet sent) or false otherwise */ bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr) + struct batadv_neigh_node *neigh_node) { const struct net_device *netdev = neigh_node->if_incoming->soft_iface; struct batadv_priv *bat_priv = netdev_priv(netdev); struct batadv_unicast_packet *packet; struct batadv_nc_path *nc_path; + struct ethhdr *ethhdr = eth_hdr(skb); __be32 packet_id; u8 *payload; @@ -1423,7 +1422,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, { struct batadv_unicast_packet *packet; struct batadv_nc_path *nc_path; - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *ethhdr = eth_hdr(skb); __be32 packet_id; u8 *payload; @@ -1482,7 +1481,7 @@ out: void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, struct sk_buff *skb) { - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *ethhdr = eth_hdr(skb); if (batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return; @@ -1533,7 +1532,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, skb_reset_network_header(skb); /* Reconstruct original mac header */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr, ðhdr_tmp, sizeof(*ethhdr)); /* Select the correct unicast header information based on the location @@ -1677,7 +1676,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, return NET_RX_DROP; coded_packet = (struct batadv_coded_packet *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* Verify frame is destined for us */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) && @@ -1763,6 +1762,13 @@ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) /* For each orig_node in this bin */ rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + /* no need to print the orig node if it does not have + * network coding neighbors + */ + if (list_empty(&orig_node->in_coding_list) && + list_empty(&orig_node->out_coding_list)) + continue; + seq_printf(seq, "Node: %pM\n", orig_node->orig); seq_puts(seq, " Ingoing: "); diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 4fa6d0caddbd..85a4ec81ad50 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -36,8 +36,7 @@ void batadv_nc_purge_orig(struct batadv_priv *bat_priv, void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv); void batadv_nc_init_orig(struct batadv_orig_node *orig_node); bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr); + struct batadv_neigh_node *neigh_node); void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, struct sk_buff *skb); void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, @@ -87,8 +86,7 @@ static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node) } static inline bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr) + struct batadv_neigh_node *neigh_node) { return false; } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 2f3452546636..f50553a7de62 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -92,7 +92,7 @@ batadv_orig_node_get_router(struct batadv_orig_node *orig_node) struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr, uint32_t seqno) + const uint8_t *neigh_addr) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_neigh_node *neigh_node; @@ -110,8 +110,8 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, atomic_set(&neigh_node->refcount, 2); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Creating new neighbor %pM, initial seqno %d\n", - neigh_addr, seqno); + "Creating new neighbor %pM on interface %s\n", neigh_addr, + hard_iface->net_dev->name); out: return neigh_node; @@ -156,12 +156,28 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) kfree(orig_node); } +/** + * batadv_orig_node_free_ref - decrement the orig node refcounter and possibly + * schedule an rcu callback for freeing it + * @orig_node: the orig node to free + */ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node) { if (atomic_dec_and_test(&orig_node->refcount)) call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu); } +/** + * batadv_orig_node_free_ref_now - decrement the orig node refcounter and + * possibly free it (without rcu callback) + * @orig_node: the orig node to free + */ +void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node) +{ + if (atomic_dec_and_test(&orig_node->refcount)) + batadv_orig_node_free_rcu(&orig_node->rcu); +} + void batadv_originator_free(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 7df48fa7669d..7887b84a9af4 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -26,11 +26,12 @@ int batadv_originator_init(struct batadv_priv *bat_priv); void batadv_originator_free(struct batadv_priv *bat_priv); void batadv_purge_orig_ref(struct batadv_priv *bat_priv); void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node); +void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node); struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr, uint32_t seqno); + const uint8_t *neigh_addr); void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node); struct batadv_neigh_node * batadv_orig_node_get_router(struct batadv_orig_node *orig_node); diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c deleted file mode 100644 index ccab0bbdbb59..000000000000 --- a/net/batman-adv/ring_buffer.c +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: - * - * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#include "main.h" -#include "ring_buffer.h" - -void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, - uint8_t value) -{ - lq_recv[*lq_index] = value; - *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE; -} - -uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]) -{ - const uint8_t *ptr; - uint16_t count = 0, i = 0, sum = 0; - - ptr = lq_recv; - - while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) { - if (*ptr != 0) { - count++; - sum += *ptr; - } - - i++; - ptr++; - } - - if (count == 0) - return 0; - - return (uint8_t)(sum / count); -} diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h deleted file mode 100644 index 3f92ae248e83..000000000000 --- a/net/batman-adv/ring_buffer.h +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: - * - * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#ifndef _NET_BATMAN_ADV_RING_BUFFER_H_ -#define _NET_BATMAN_ADV_RING_BUFFER_H_ - -void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, - uint8_t value); -uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]); - -#endif /* _NET_BATMAN_ADV_RING_BUFFER_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index b27a4d792d15..2f0bd3ffe6e8 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -34,35 +34,6 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); -void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) -{ - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_head *head; - struct batadv_orig_node *orig_node; - unsigned long *word; - uint32_t i; - size_t word_index; - uint8_t *w; - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - spin_lock_bh(&orig_node->ogm_cnt_lock); - word_index = hard_iface->if_num * BATADV_NUM_WORDS; - word = &(orig_node->bcast_own[word_index]); - - batadv_bit_get_packet(bat_priv, word, 1, 0); - w = &orig_node->bcast_own_sum[hard_iface->if_num]; - *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE); - spin_unlock_bh(&orig_node->ogm_cnt_lock); - } - rcu_read_unlock(); - } -} - static void _batadv_update_route(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node) @@ -256,7 +227,7 @@ bool batadv_check_management_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, header_len))) return false; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with broadcast indication but unicast recipient */ if (!is_broadcast_ether_addr(ethhdr->h_dest)) @@ -314,7 +285,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, icmp_packet->msg_type = BATADV_ECHO_REPLY; icmp_packet->header.ttl = BATADV_TTL; - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -362,7 +333,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet->msg_type = BATADV_TTL_EXCEEDED; icmp_packet->header.ttl = BATADV_TTL; - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -392,7 +363,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, hdr_size))) goto out; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) @@ -439,7 +410,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmp_packet->header.ttl--; /* route it */ - if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) + if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -569,7 +540,7 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, if (unlikely(!pskb_may_pull(skb, hdr_size))) return -ENODATA; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) @@ -803,8 +774,8 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL; struct batadv_neigh_node *neigh_node = NULL; struct batadv_unicast_packet *unicast_packet; - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); - int ret = NET_RX_DROP; + struct ethhdr *ethhdr = eth_hdr(skb); + int res, ret = NET_RX_DROP; struct sk_buff *new_skb; unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -864,16 +835,19 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /* decrement ttl */ unicast_packet->header.ttl--; - /* network code packet if possible */ - if (batadv_nc_skb_forward(skb, neigh_node, ethhdr)) { - ret = NET_RX_SUCCESS; - } else if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) { - ret = NET_RX_SUCCESS; + res = batadv_send_skb_to_orig(skb, orig_node, recv_if); - /* Update stats counter */ + /* translate transmit result into receive result */ + if (res == NET_XMIT_SUCCESS) { + /* skb was transmitted and consumed */ batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, skb->len + ETH_HLEN); + + ret = NET_RX_SUCCESS; + } else if (res == NET_XMIT_POLICED) { + /* skb was buffered and consumed */ + ret = NET_RX_SUCCESS; } out: @@ -1165,7 +1139,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, hdr_size))) goto out; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with broadcast indication but unicast recipient */ if (!is_broadcast_ether_addr(ethhdr->h_dest)) @@ -1265,7 +1239,7 @@ int batadv_recv_vis_packet(struct sk_buff *skb, return NET_RX_DROP; vis_packet = (struct batadv_vis_packet *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* not for me */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 99eeafaba407..72a29bde2010 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -20,7 +20,6 @@ #ifndef _NET_BATMAN_ADV_ROUTING_H_ #define _NET_BATMAN_ADV_ROUTING_H_ -void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface); bool batadv_check_management_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, int header_len); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 263cfd1ccee7..e9ff8d801201 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -61,7 +61,7 @@ int batadv_send_skb_packet(struct sk_buff *skb, skb_reset_mac_header(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN); memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN); ethhdr->h_proto = __constant_htons(ETH_P_BATMAN); @@ -96,26 +96,37 @@ send_skb_err: * host, NULL can be passed as recv_if and no interface alternating is * attempted. * - * Returns TRUE on success; FALSE otherwise. + * Returns NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or + * NET_XMIT_POLICED if the skb is buffered for later transmit. */ -bool batadv_send_skb_to_orig(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - struct batadv_hard_iface *recv_if) +int batadv_send_skb_to_orig(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if) { struct batadv_priv *bat_priv = orig_node->bat_priv; struct batadv_neigh_node *neigh_node; + int ret = NET_XMIT_DROP; /* batadv_find_router() increases neigh_nodes refcount if found. */ neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); if (!neigh_node) - return false; + return ret; - /* route it */ - batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + /* try to network code the packet, if it is received on an interface + * (i.e. being forwarded). If the packet originates from this node or if + * network coding fails, then send the packet as usual. + */ + if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) { + ret = NET_XMIT_POLICED; + } else { + batadv_send_skb_packet(skb, neigh_node->if_incoming, + neigh_node->addr); + ret = NET_XMIT_SUCCESS; + } batadv_neigh_node_free_ref(neigh_node); - return true; + return ret; } void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface) @@ -152,8 +163,6 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, unsigned long send_time) { - INIT_HLIST_NODE(&forw_packet->list); - /* add new packet to packet list */ spin_lock_bh(&bat_priv->forw_bcast_list_lock); hlist_add_head(&forw_packet->list, &bat_priv->forw_bcast_list); @@ -260,6 +269,9 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) if (hard_iface->soft_iface != soft_iface) continue; + if (forw_packet->num_packets >= hard_iface->num_bcasts) + continue; + /* send a copy of the saved skb */ skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); if (skb1) @@ -271,7 +283,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) forw_packet->num_packets++; /* if we still have some more bcasts to send */ - if (forw_packet->num_packets < 3) { + if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) { _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, msecs_to_jiffies(5)); return; diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 38e662f619ac..e7b17880fca4 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -23,9 +23,9 @@ int batadv_send_skb_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, const uint8_t *dst_addr); -bool batadv_send_skb_to_orig(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - struct batadv_hard_iface *recv_if); +int batadv_send_skb_to_orig(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if); void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface); int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, const struct sk_buff *skb, diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 6f20d339e33a..700d0b49742d 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -154,7 +154,7 @@ static int batadv_interface_tx(struct sk_buff *skb, 0x00, 0x00}; unsigned int header_len = 0; int data_len = skb->len, ret; - short vid __maybe_unused = -1; + unsigned short vid __maybe_unused = BATADV_NO_FLAGS; bool do_bcast = false; uint32_t seqno; unsigned long brd_delay = 1; @@ -303,7 +303,7 @@ void batadv_interface_rx(struct net_device *soft_iface, struct ethhdr *ethhdr; struct vlan_ethhdr *vhdr; struct batadv_header *batadv_header = (struct batadv_header *)skb->data; - short vid __maybe_unused = -1; + unsigned short vid __maybe_unused = BATADV_NO_FLAGS; __be16 ethertype = __constant_htons(ETH_P_BATMAN); bool is_bcast; @@ -316,7 +316,7 @@ void batadv_interface_rx(struct net_device *soft_iface, skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: @@ -505,6 +505,7 @@ unreg_debugfs: batadv_debugfs_del_meshif(dev); free_bat_counters: free_percpu(bat_priv->bat_counters); + bat_priv->bat_counters = NULL; return ret; } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 5e89deeb9542..429aeef3d8b2 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -144,7 +144,12 @@ static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) struct batadv_tt_orig_list_entry *orig_entry; orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu); - batadv_orig_node_free_ref(orig_entry->orig_node); + + /* We are in an rcu callback here, therefore we cannot use + * batadv_orig_node_free_ref() and its call_rcu(): + * An rcu_barrier() wouldn't wait for that to finish + */ + batadv_orig_node_free_ref_now(orig_entry->orig_node); kfree(orig_entry); } @@ -158,10 +163,19 @@ batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); } +/** + * batadv_tt_local_event - store a local TT event (ADD/DEL) + * @bat_priv: the bat priv with all the soft interface information + * @tt_local_entry: the TT entry involved in the event + * @event_flags: flags to store in the event structure + */ static void batadv_tt_local_event(struct batadv_priv *bat_priv, - const uint8_t *addr, uint8_t flags) + struct batadv_tt_local_entry *tt_local_entry, + uint8_t event_flags) { struct batadv_tt_change_node *tt_change_node, *entry, *safe; + struct batadv_tt_common_entry *common = &tt_local_entry->common; + uint8_t flags = common->flags | event_flags; bool event_removed = false; bool del_op_requested, del_op_entry; @@ -171,7 +185,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, return; tt_change_node->change.flags = flags; - memcpy(tt_change_node->change.addr, addr, ETH_ALEN); + memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN); del_op_requested = flags & BATADV_TT_CLIENT_DEL; @@ -179,7 +193,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->tt.changes_list_lock); list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { - if (!batadv_compare_eth(entry->change.addr, addr)) + if (!batadv_compare_eth(entry->change.addr, common->addr)) continue; /* DEL+ADD in the same orig interval have no effect and can be @@ -327,7 +341,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, } add_event: - batadv_tt_local_event(bat_priv, addr, tt_local->common.flags); + batadv_tt_local_event(bat_priv, tt_local, BATADV_NO_FLAGS); check_roaming: /* Check whether it is a roaming, but don't do anything if the roaming @@ -524,8 +538,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, struct batadv_tt_local_entry *tt_local_entry, uint16_t flags, const char *message) { - batadv_tt_local_event(bat_priv, tt_local_entry->common.addr, - tt_local_entry->common.flags | flags); + batadv_tt_local_event(bat_priv, tt_local_entry, flags); /* The local client has to be marked as "pending to be removed" but has * to be kept in the table in order to send it in a full table @@ -579,8 +592,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, /* if this client has been added right now, it is possible to * immediately purge it */ - batadv_tt_local_event(bat_priv, tt_local_entry->common.addr, - curr_flags | BATADV_TT_CLIENT_DEL); + batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); hlist_del_rcu(&tt_local_entry->common.hash_entry); batadv_tt_local_entry_free_ref(tt_local_entry); @@ -786,10 +798,25 @@ out: batadv_tt_orig_list_entry_free_ref(orig_entry); } -/* caller must hold orig_node refcount */ +/** + * batadv_tt_global_add - add a new TT global entry or update an existing one + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: the originator announcing the client + * @tt_addr: the mac address of the non-mesh client + * @flags: TT flags that have to be set for this non-mesh client + * @ttvn: the tt version number ever announcing this non-mesh client + * + * Add a new TT global entry for the given originator. If the entry already + * exists add a new reference to the given originator (a global entry can have + * references to multiple originators) and adjust the flags attribute to reflect + * the function argument. + * If a TT local entry exists for this non-mesh client remove it. + * + * The caller must hold orig_node refcount. + */ int batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *tt_addr, uint8_t flags, + const unsigned char *tt_addr, uint16_t flags, uint8_t ttvn) { struct batadv_tt_global_entry *tt_global_entry; @@ -1595,11 +1622,11 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = tt_query_size + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len); tt_response->ttvn = ttvn; @@ -1660,11 +1687,11 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, if (!tt_req_node) goto out; - skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, sizeof(*tt_request) + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); tt_req_len = sizeof(*tt_request); tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len); @@ -1686,7 +1713,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_TX); - if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: @@ -1710,7 +1737,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, struct batadv_orig_node *req_dst_orig_node; struct batadv_orig_node *res_dst_orig_node = NULL; uint8_t orig_ttvn, req_ttvn, ttvn; - int ret = false; + int res, ret = false; unsigned char *tt_buff; bool full_table; uint16_t tt_len, tt_tot; @@ -1757,11 +1784,11 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto unlock; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); packet_pos = skb_put(skb, len); tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; @@ -1805,8 +1832,10 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); - if (batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL)) + res = batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL); + if (res != NET_XMIT_DROP) ret = true; + goto out; unlock: @@ -1873,11 +1902,11 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto unlock; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); packet_pos = skb_put(skb, len); tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; @@ -1920,7 +1949,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = true; goto out; @@ -2207,11 +2236,11 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, if (!batadv_tt_check_roam_count(bat_priv, client)) goto out; - skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len); @@ -2233,7 +2262,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX); - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index ab8e683b402f..659a3bb759ce 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -33,7 +33,7 @@ void batadv_tt_global_add_orig(struct batadv_priv *bat_priv, const unsigned char *tt_buff, int tt_buff_len); int batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *addr, uint8_t flags, + const unsigned char *addr, uint16_t flags, uint8_t ttvn); int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index aba8364c3689..b2c94e139319 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -61,6 +61,7 @@ struct batadv_hard_iface_bat_iv { * @if_status: status of the interface for batman-adv * @net_dev: pointer to the net_device * @frag_seqno: last fragment sequence number sent by this interface + * @num_bcasts: number of payload re-broadcasts on this interface (ARQ) * @hardif_obj: kobject of the per interface sysfs "mesh" directory * @refcount: number of contexts the object is used * @batman_adv_ptype: packet type describing packets that should be processed by @@ -76,6 +77,7 @@ struct batadv_hard_iface { char if_status; struct net_device *net_dev; atomic_t frag_seqno; + uint8_t num_bcasts; struct kobject *hardif_obj; atomic_t refcount; struct packet_type batman_adv_ptype; @@ -640,7 +642,7 @@ struct batadv_socket_packet { #ifdef CONFIG_BATMAN_ADV_BLA struct batadv_bla_backbone_gw { uint8_t orig[ETH_ALEN]; - short vid; + unsigned short vid; struct hlist_node hash_entry; struct batadv_priv *bat_priv; unsigned long lasttime; @@ -663,7 +665,7 @@ struct batadv_bla_backbone_gw { */ struct batadv_bla_claim { uint8_t addr[ETH_ALEN]; - short vid; + unsigned short vid; struct batadv_bla_backbone_gw *backbone_gw; unsigned long lasttime; struct hlist_node hash_entry; diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 0bb3b5982f94..dc8b5d4dd636 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -464,7 +464,7 @@ find_router: goto out; } - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 1625e5793a89..4983340f1943 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -392,12 +392,12 @@ batadv_add_packet(struct batadv_priv *bat_priv, return NULL; len = sizeof(*packet) + vis_info_len; - info->skb_packet = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + info->skb_packet = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!info->skb_packet) { kfree(info); return NULL; } - skb_reserve(info->skb_packet, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(info->skb_packet, ETH_HLEN); packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len); kref_init(&info->refcount); @@ -697,7 +697,7 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node; struct batadv_vis_packet *packet; struct sk_buff *skb; - uint32_t i; + uint32_t i, res; packet = (struct batadv_vis_packet *)info->skb_packet->data; @@ -724,7 +724,8 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, if (!skb) continue; - if (!batadv_send_skb_to_orig(skb, orig_node, NULL)) + res = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (res == NET_XMIT_DROP) kfree_skb(skb); } rcu_read_unlock(); @@ -748,7 +749,7 @@ static void batadv_unicast_vis_packet(struct batadv_priv *bat_priv, if (!skb) goto out; - if (!batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP) kfree_skb(skb); out: @@ -854,13 +855,13 @@ int batadv_vis_init(struct batadv_priv *bat_priv) if (!bat_priv->vis.my_info) goto err; - len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE; - len += ETH_HLEN + NET_IP_ALIGN; - bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len); + len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN; + bat_priv->vis.my_info->skb_packet = netdev_alloc_skb_ip_align(NULL, + len); if (!bat_priv->vis.my_info->skb_packet) goto free_info; - skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN); tmp_skb = bat_priv->vis.my_info->skb_packet; packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet)); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 967312803e41..75f3239130f8 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -22,6 +22,9 @@ #include <asm/uaccess.h> #include "br_private.h" +#define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \ + NETIF_F_GSO_MASK | NETIF_F_HW_CSUM) + /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -346,12 +349,10 @@ void br_dev_setup(struct net_device *dev) dev->tx_queue_len = 0; dev->priv_flags = IFF_EBRIDGE; - dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX | - NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX; - dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_CTAG_TX; + dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | + NETIF_F_HW_VLAN_CTAG_TX; + dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX; + dev->vlan_features = COMMON_FEATURES; br->dev = dev; spin_lock_init(&br->lock); diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 1644b3e1f947..3a3f371b2841 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -31,7 +31,7 @@ struct notifier_block br_device_notifier = { */ static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net_bridge_port *p; struct net_bridge *br; bool changed_addr; diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 9878eb8204c5..19c37a4929bc 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -72,13 +72,12 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset) } static void -ebt_log_packet(u_int8_t pf, unsigned int hooknum, - const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct nf_loginfo *loginfo, - const char *prefix) +ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, + const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct nf_loginfo *loginfo, + const char *prefix) { unsigned int bitmask; - struct net *net = dev_net(in ? in : out); /* FIXME: Disabled from containers until syslog ns is supported */ if (!net_eq(net, &init_net)) @@ -191,7 +190,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, par->in, par->out, &li, "%s", info->prefix); else - ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, + ebt_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, par->in, par->out, &li, info->prefix); return EBT_CONTINUE; } diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 2ec6c19ff903..518093802d1d 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -131,14 +131,16 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size) return skb; } -static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct ebt_ulog_info *uloginfo, const char *prefix) +static void ebt_ulog_packet(struct net *net, unsigned int hooknr, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct ebt_ulog_info *uloginfo, + const char *prefix) { ebt_ulog_packet_msg_t *pm; size_t size, copy_len; struct nlmsghdr *nlh; - struct net *net = dev_net(in ? in : out); struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); unsigned int group = uloginfo->nlgroup; ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group]; @@ -233,7 +235,7 @@ unlock: } /* this function is registered with the netfilter core */ -static void ebt_log_packet(u_int8_t pf, unsigned int hooknum, +static void ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *li, const char *prefix) @@ -252,13 +254,15 @@ static void ebt_log_packet(u_int8_t pf, unsigned int hooknum, strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix)); } - ebt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); + ebt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix); } static unsigned int ebt_ulog_tg(struct sk_buff *skb, const struct xt_action_param *par) { - ebt_ulog_packet(par->hooknum, skb, par->in, par->out, + struct net *net = dev_net(par->in ? par->in : par->out); + + ebt_ulog_packet(net, par->hooknum, skb, par->in, par->out, par->targinfo, NULL); return EBT_CONTINUE; } diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 1f9ece1a9c34..4dca159435cf 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -352,9 +352,9 @@ EXPORT_SYMBOL(caif_enroll_dev); /* notify Caif of device events */ static int caif_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct caif_device_entry *caifd = NULL; struct caif_dev_common *caifdev; struct cfcnfg *cfg; diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 942e00a425fd..75ed04b78fa4 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -121,9 +121,9 @@ static struct packet_type caif_usb_type __read_mostly = { }; static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct caif_dev_common common; struct cflayer *layer, *link_support; struct usbnet *usbnet; diff --git a/net/can/af_can.c b/net/can/af_can.c index c4e50852c9f4..3ab8dd2e1282 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -794,9 +794,9 @@ EXPORT_SYMBOL(can_proto_unregister); * af_can notifier to create/remove CAN netdevice specific structs */ static int can_notifier(struct notifier_block *nb, unsigned long msg, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dev_rcv_lists *d; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/can/bcm.c b/net/can/bcm.c index 8f113e6ff327..46f20bfafc0e 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1350,9 +1350,9 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, * notification handler for netdevice status changes */ static int bcm_notifier(struct notifier_block *nb, unsigned long msg, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier); struct sock *sk = &bo->sk; struct bcm_op *op; diff --git a/net/can/gw.c b/net/can/gw.c index 3ee690e8c7d3..2f291f961a17 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -445,9 +445,9 @@ static inline void cgw_unregister_filter(struct cgw_job *gwj) } static int cgw_notifier(struct notifier_block *nb, - unsigned long msg, void *data) + unsigned long msg, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/can/raw.c b/net/can/raw.c index 1085e65f848e..641e1c895123 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -239,9 +239,9 @@ static int raw_enable_allfilters(struct net_device *dev, struct sock *sk) } static int raw_notifier(struct notifier_block *nb, - unsigned long msg, void *data) + unsigned long msg, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct raw_sock *ro = container_of(nb, struct raw_sock, notifier); struct sock *sk = &ro->sk; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index a3395fdfbd4f..d5953b87918c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1204,6 +1204,7 @@ void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, mutex_lock(&osdc->request_mutex); if (req->r_linger) { __unregister_linger_request(osdc, req); + req->r_linger = 0; ceph_osdc_put_request(req); } mutex_unlock(&osdc->request_mutex); @@ -2120,7 +2121,9 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, down_read(&osdc->map_sem); mutex_lock(&osdc->request_mutex); __register_request(osdc, req); - WARN_ON(req->r_sent); + req->r_sent = 0; + req->r_got_reply = 0; + req->r_completed = 0; rc = __map_request(osdc, req, 0); if (rc < 0) { if (nofail) { diff --git a/net/core/dev.c b/net/core/dev.c index 7229bc30e509..9c18557f93c6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1198,9 +1198,7 @@ static int __dev_open(struct net_device *dev) * If we don't do this there is a chance ndo_poll_controller * or ndo_poll may be running while we open the device */ - ret = netpoll_rx_disable(dev); - if (ret) - return ret; + netpoll_rx_disable(dev); ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); ret = notifier_to_errno(ret); @@ -1309,9 +1307,7 @@ static int __dev_close(struct net_device *dev) LIST_HEAD(single); /* Temporarily disable netpoll until the interface is down */ - retval = netpoll_rx_disable(dev); - if (retval) - return retval; + netpoll_rx_disable(dev); list_add(&dev->unreg_list, &single); retval = __dev_close_many(&single); @@ -1353,14 +1349,11 @@ static int dev_close_many(struct list_head *head) */ int dev_close(struct net_device *dev) { - int ret = 0; if (dev->flags & IFF_UP) { LIST_HEAD(single); /* Block netpoll rx while the interface is going down */ - ret = netpoll_rx_disable(dev); - if (ret) - return ret; + netpoll_rx_disable(dev); list_add(&dev->unreg_list, &single); dev_close_many(&single); @@ -1368,7 +1361,7 @@ int dev_close(struct net_device *dev) netpoll_rx_enable(dev); } - return ret; + return 0; } EXPORT_SYMBOL(dev_close); @@ -1398,6 +1391,14 @@ void dev_disable_lro(struct net_device *dev) } EXPORT_SYMBOL(dev_disable_lro); +static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, + struct net_device *dev) +{ + struct netdev_notifier_info info; + + netdev_notifier_info_init(&info, dev); + return nb->notifier_call(nb, val, &info); +} static int dev_boot_phase = 1; @@ -1430,7 +1431,7 @@ int register_netdevice_notifier(struct notifier_block *nb) goto unlock; for_each_net(net) { for_each_netdev(net, dev) { - err = nb->notifier_call(nb, NETDEV_REGISTER, dev); + err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); err = notifier_to_errno(err); if (err) goto rollback; @@ -1438,7 +1439,7 @@ int register_netdevice_notifier(struct notifier_block *nb) if (!(dev->flags & IFF_UP)) continue; - nb->notifier_call(nb, NETDEV_UP, dev); + call_netdevice_notifier(nb, NETDEV_UP, dev); } } @@ -1454,10 +1455,11 @@ rollback: goto outroll; if (dev->flags & IFF_UP) { - nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); - nb->notifier_call(nb, NETDEV_DOWN, dev); + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, + dev); + call_netdevice_notifier(nb, NETDEV_DOWN, dev); } - nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); } } @@ -1495,10 +1497,11 @@ int unregister_netdevice_notifier(struct notifier_block *nb) for_each_net(net) { for_each_netdev(net, dev) { if (dev->flags & IFF_UP) { - nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); - nb->notifier_call(nb, NETDEV_DOWN, dev); + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, + dev); + call_netdevice_notifier(nb, NETDEV_DOWN, dev); } - nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); } } unlock: @@ -1508,6 +1511,25 @@ unlock: EXPORT_SYMBOL(unregister_netdevice_notifier); /** + * call_netdevice_notifiers_info - call all network notifier blocks + * @val: value passed unmodified to notifier function + * @dev: net_device pointer passed unmodified to notifier function + * @info: notifier information data + * + * Call all network notifier blocks. Parameters and return value + * are as for raw_notifier_call_chain(). + */ + +int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, + struct netdev_notifier_info *info) +{ + ASSERT_RTNL(); + netdev_notifier_info_init(info, dev); + return raw_notifier_call_chain(&netdev_chain, val, info); +} +EXPORT_SYMBOL(call_netdevice_notifiers_info); + +/** * call_netdevice_notifiers - call all network notifier blocks * @val: value passed unmodified to notifier function * @dev: net_device pointer passed unmodified to notifier function @@ -1518,8 +1540,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - ASSERT_RTNL(); - return raw_notifier_call_chain(&netdev_chain, val, dev); + struct netdev_notifier_info info; + + return call_netdevice_notifiers_info(val, dev, &info); } EXPORT_SYMBOL(call_netdevice_notifiers); @@ -1701,7 +1724,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb2); if (skb_network_header(skb2) < skb2->data || - skb2->network_header > skb2->tail) { + skb_network_header(skb2) > skb_tail_pointer(skb2)) { net_crit_ratelimited("protocol %04x is buggy, dev %s\n", ntohs(skb2->protocol), dev->name); @@ -3869,7 +3892,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb) NAPI_GRO_CB(skb)->frag0 = NULL; NAPI_GRO_CB(skb)->frag0_len = 0; - if (skb->mac_header == skb->tail && + if (skb_mac_header(skb) == skb_tail_pointer(skb) && pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); @@ -4411,7 +4434,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, else list_add_tail_rcu(&upper->list, &dev->upper_dev_list); dev_hold(upper_dev); - + call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); return 0; } @@ -4471,6 +4494,7 @@ void netdev_upper_dev_unlink(struct net_device *dev, list_del_rcu(&upper->list); dev_put(upper_dev); kfree_rcu(upper, rcu); + call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -4741,8 +4765,13 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) } if (dev->flags & IFF_UP && - (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) - call_netdevice_notifiers(NETDEV_CHANGE, dev); + (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) { + struct netdev_notifier_change_info change_info; + + change_info.flags_changed = changes; + call_netdevice_notifiers_info(NETDEV_CHANGE, dev, + &change_info.info); + } } /** @@ -5276,6 +5305,10 @@ int register_netdevice(struct net_device *dev) */ dev->hw_enc_features |= NETIF_F_SG; + /* Make NETIF_F_SG inheritable to MPLS. + */ + dev->mpls_features |= NETIF_F_SG; + ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); ret = notifier_to_errno(ret); if (ret) @@ -6055,7 +6088,7 @@ netdev_features_t netdev_increment_features(netdev_features_t all, } EXPORT_SYMBOL(netdev_increment_features); -static struct hlist_head *netdev_create_hash(void) +static struct hlist_head * __net_init netdev_create_hash(void) { int i; struct hlist_head *hash; diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index c013f38482a1..6cda4e2c2132 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -39,6 +39,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, ha->refcount = 1; ha->global_use = global; ha->synced = sync; + ha->sync_cnt = 0; list_add_tail_rcu(&ha->list, &list->list); list->count++; @@ -66,7 +67,7 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, } if (sync) { if (ha->synced) - return 0; + return -EEXIST; else ha->synced = true; } @@ -139,10 +140,13 @@ static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list, err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, false, true); - if (err) + if (err && err != -EEXIST) return err; - ha->sync_cnt++; - ha->refcount++; + + if (!err) { + ha->sync_cnt++; + ha->refcount++; + } return 0; } @@ -159,7 +163,8 @@ static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list, if (err) return; ha->sync_cnt--; - __hw_addr_del_entry(from_list, ha, false, true); + /* address on from list is not marked synced */ + __hw_addr_del_entry(from_list, ha, false, false); } static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, @@ -796,7 +801,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) return -EINVAL; netif_addr_lock_nested(to); - err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); + err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); netif_addr_unlock(to); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index d23b6682f4e9..5e78d44333b9 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -295,9 +295,9 @@ static int net_dm_cmd_trace(struct sk_buff *skb, } static int dropmon_net_event(struct notifier_block *ev_block, - unsigned long event, void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dm_hw_stat_delta *new_stat = NULL; struct dm_hw_stat_delta *tmp; diff --git a/net/core/dst.c b/net/core/dst.c index df9cc810ec8e..ca4231ec7347 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -372,7 +372,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dst_entry *dst, *last = NULL; switch (event) { diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 22efdaa76ebf..cd23d314d68a 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -82,6 +82,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", + [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", @@ -1413,7 +1414,7 @@ static int ethtool_get_module_eeprom(struct net_device *dev, modinfo.eeprom_len); } -/* The main entry point in this file. Called from net/core/dev.c */ +/* The main entry point in this file. Called from net/core/dev_ioctl.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index d5a9f8ead0d8..21735440c44a 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -705,9 +705,9 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) static int fib_rules_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct fib_rules_ops *ops; diff --git a/net/core/iovec.c b/net/core/iovec.c index 7e7aeb01de45..de178e462682 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -75,31 +75,6 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a /* * Copy kernel to iovec. Returns -EFAULT on error. - * - * Note: this modifies the original iovec. - */ - -int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len) -{ - while (len > 0) { - if (iov->iov_len) { - int copy = min_t(unsigned int, iov->iov_len, len); - if (copy_to_user(iov->iov_base, kdata, copy)) - return -EFAULT; - kdata += copy; - len -= copy; - iov->iov_len -= copy; - iov->iov_base += copy; - } - iov++; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_toiovec); - -/* - * Copy kernel to iovec. Returns -EFAULT on error. */ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, @@ -125,31 +100,6 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, EXPORT_SYMBOL(memcpy_toiovecend); /* - * Copy iovec to kernel. Returns -EFAULT on error. - * - * Note: this modifies the original iovec. - */ - -int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len) -{ - while (len > 0) { - if (iov->iov_len) { - int copy = min_t(unsigned int, len, iov->iov_len); - if (copy_from_user(kdata, iov->iov_base, copy)) - return -EFAULT; - len -= copy; - kdata += copy; - iov->iov_base += copy; - iov->iov_len -= copy; - } - iov++; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_fromiovec); - -/* * Copy iovec from kernel. Returns -EFAULT on error. */ diff --git a/net/core/netpoll.c b/net/core/netpoll.c index cec074be8c43..03c8ec3edc72 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -247,7 +247,7 @@ static void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } -int netpoll_rx_disable(struct net_device *dev) +void netpoll_rx_disable(struct net_device *dev) { struct netpoll_info *ni; int idx; @@ -257,7 +257,6 @@ int netpoll_rx_disable(struct net_device *dev) if (ni) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); - return 0; } EXPORT_SYMBOL(netpoll_rx_disable); @@ -690,25 +689,20 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo send_skb->dev = skb->dev; skb_reset_network_header(send_skb); - skb_put(send_skb, sizeof(struct ipv6hdr)); - hdr = ipv6_hdr(send_skb); - + hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr)); *(__be32*)hdr = htonl(0x60000000); - hdr->payload_len = htons(size); hdr->nexthdr = IPPROTO_ICMPV6; hdr->hop_limit = 255; hdr->saddr = *saddr; hdr->daddr = *daddr; - send_skb->transport_header = send_skb->tail; - skb_put(send_skb, size); - - icmp6h = (struct icmp6hdr *)skb_transport_header(skb); + icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr)); icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; icmp6h->icmp6_router = 0; icmp6h->icmp6_solicited = 1; - target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr)); + + target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr)); *target = msg->target; icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, IPPROTO_ICMPV6, diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 0777d0aa18c3..e533259dce3c 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -261,7 +261,7 @@ struct cgroup_subsys net_prio_subsys = { static int netprio_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netprio_map *old; /* diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 11f2704c3810..303412d8332b 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1921,7 +1921,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d static int pktgen_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id); if (pn->pktgen_exiting) @@ -2708,15 +2708,15 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IP); } - skb->network_header = skb->tail; - skb->transport_header = skb->network_header + sizeof(struct iphdr); - skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, skb->len); + iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr)); + + skb_set_transport_header(skb, skb->len); + udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); skb->priority = pkt_dev->skb_priority; - iph = ip_hdr(skb); - udph = udp_hdr(skb); - memcpy(eth, pkt_dev->hh, 12); *(__be16 *) & eth[12] = protocol; @@ -2746,8 +2746,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->check = 0; iph->check = ip_fast_csum((void *)iph, iph->ihl); skb->protocol = protocol; - skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->pkt_overhead); skb->dev = odev; skb->pkt_type = PACKET_HOST; pktgen_finalize_skb(pkt_dev, skb, datalen); @@ -2822,13 +2820,14 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IPV6); } - skb->network_header = skb->tail; - skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); - skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, skb->len); + iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); + + skb_set_transport_header(skb, skb->len); + udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); skb->priority = pkt_dev->skb_priority; - iph = ipv6_hdr(skb); - udph = udp_hdr(skb); memcpy(eth, pkt_dev->hh, 12); *(__be16 *) ð[12] = protocol; @@ -2863,8 +2862,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, iph->daddr = pkt_dev->cur_in6_daddr; iph->saddr = pkt_dev->cur_in6_saddr; - skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->pkt_overhead); skb->protocol = protocol; skb->dev = odev; skb->pkt_type = PACKET_HOST; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a08bd2b7fe3f..49c14451d8ab 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2667,7 +2667,7 @@ static void rtnetlink_rcv(struct sk_buff *skb) static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_UP: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d6298914f4e7..73f57a0e1523 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -195,13 +195,11 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->data = NULL; + skb->head = NULL; skb->truesize = sizeof(struct sk_buff); atomic_set(&skb->users, 1); -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; out: return skb; } @@ -275,10 +273,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; - skb->transport_header = ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; + skb->transport_header = (typeof(skb->transport_header))~0U; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); @@ -344,10 +340,8 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; - skb->transport_header = ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; + skb->transport_header = (typeof(skb->transport_header))~0U; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); @@ -611,7 +605,7 @@ static void skb_release_head_state(struct sk_buff *skb) static void skb_release_all(struct sk_buff *skb) { skb_release_head_state(skb); - if (likely(skb->data)) + if (likely(skb->head)) skb_release_data(skb); } diff --git a/net/core/sock.c b/net/core/sock.c index 6ba327da79e1..88868a9d21da 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -210,7 +210,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , - "sk_lock-AF_NFC" , "sk_lock-AF_MAX" + "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , @@ -226,7 +226,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , - "slock-AF_NFC" , "slock-AF_MAX" + "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , @@ -242,7 +242,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , - "clock-AF_NFC" , "clock-AF_MAX" + "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX" }; /* diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index c21f200eed93..dd4d506ef923 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2078,9 +2078,9 @@ out_err: } static int dn_device_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 55e1fd5b3e56..3b9d5f20bd1c 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1352,10 +1352,9 @@ static inline void lowpan_netlink_fini(void) } static int lowpan_device_event(struct notifier_block *unused, - unsigned long event, - void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); LIST_HEAD(del_list); struct lowpan_dev_record *entry, *tmp; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8603ca827104..37cf1a6ea3ad 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -9,10 +9,7 @@ config IP_MULTICAST intend to participate in the MBONE, a high bandwidth network on top of the Internet which carries audio and video broadcasts. More information about the MBONE is on the WWW at - <http://www.savetz.com/mbone/>. Information about the multicast - capabilities of the various network cards is contained in - <file:Documentation/networking/multicast.txt>. For most people, it's - safe to say N. + <http://www.savetz.com/mbone/>. For most people, it's safe to say N. config IP_ADVANCED_ROUTER bool "IP: advanced router" @@ -223,10 +220,8 @@ config IP_MROUTE packets that have several destination addresses. It is needed on the MBONE, a high bandwidth network on top of the Internet which carries audio and video broadcasts. In order to do that, you would most - likely run the program mrouted. Information about the multicast - capabilities of the various network cards is contained in - <file:Documentation/networking/multicast.txt>. If you haven't heard - about it, you don't need it. + likely run the program mrouted. If you haven't heard about it, you + don't need it. config IP_MROUTE_MULTIPLE_TABLES bool "IP: multicast policy routing" diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d01be2a3ae53..9c090c7daea1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1295,6 +1295,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_GRE | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | + SKB_GSO_MPLS | 0))) goto out; @@ -1384,7 +1385,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, goto out_unlock; id = ntohl(*(__be32 *)&iph->id); - flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); + flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF)); id >>= 16; for (p = *head; p; p = p->next) { @@ -1406,6 +1407,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | + ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 2e7f1948216f..717902669d2f 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -419,12 +419,9 @@ static void ah4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 247ec1951c35..4429b013f269 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1234,13 +1234,19 @@ out: static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_change_info *change_info; switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); rt_cache_flush(dev_net(dev)); break; + case NETDEV_CHANGE: + change_info = ptr; + if (change_info->flags_changed & IFF_NOARP) + neigh_changeaddr(&arp_tbl, dev); + break; default: break; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index dfc39d4d48b7..b047e2d8a614 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1333,7 +1333,7 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev, static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 4cfe34d4cc96..ab3d814bc80a 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -502,12 +502,9 @@ static void esp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c7629a209f9d..05a4888dede9 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1038,7 +1038,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev; struct net *net = dev_net(dev); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 76e10b47e053..5f7d11a45871 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -482,7 +482,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct iphdr *iph; int room; - struct icmp_bxm icmp_param; + struct icmp_bxm *icmp_param; struct rtable *rt = skb_rtable(skb_in); struct ipcm_cookie ipc; struct flowi4 fl4; @@ -503,7 +503,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) iph = ip_hdr(skb_in); if ((u8 *)iph < skb_in->head || - (skb_in->network_header + sizeof(*iph)) > skb_in->tail) + (skb_network_header(skb_in) + sizeof(*iph)) > + skb_tail_pointer(skb_in)) goto out; /* @@ -557,9 +558,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } } + icmp_param = kmalloc(sizeof(*icmp_param), GFP_ATOMIC); + if (!icmp_param) + return; + sk = icmp_xmit_lock(net); if (sk == NULL) - return; + goto out_free; /* * Construct source address and options. @@ -585,7 +590,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) IPTOS_PREC_INTERNETCONTROL) : iph->tos; - if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) + if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) goto out_unlock; @@ -593,19 +598,19 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) * Prepare data for ICMP header. */ - icmp_param.data.icmph.type = type; - icmp_param.data.icmph.code = code; - icmp_param.data.icmph.un.gateway = info; - icmp_param.data.icmph.checksum = 0; - icmp_param.skb = skb_in; - icmp_param.offset = skb_network_offset(skb_in); + icmp_param->data.icmph.type = type; + icmp_param->data.icmph.code = code; + icmp_param->data.icmph.un.gateway = info; + icmp_param->data.icmph.checksum = 0; + icmp_param->skb = skb_in; + icmp_param->offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; - ipc.opt = &icmp_param.replyopts.opt; + ipc.opt = &icmp_param->replyopts.opt; ipc.tx_flags = 0; rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, - type, code, &icmp_param); + type, code, icmp_param); if (IS_ERR(rt)) goto out_unlock; @@ -617,19 +622,21 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) room = dst_mtu(&rt->dst); if (room > 576) room = 576; - room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; + room -= sizeof(struct iphdr) + icmp_param->replyopts.opt.opt.optlen; room -= sizeof(struct icmphdr); - icmp_param.data_len = skb_in->len - icmp_param.offset; - if (icmp_param.data_len > room) - icmp_param.data_len = room; - icmp_param.head_len = sizeof(struct icmphdr); + icmp_param->data_len = skb_in->len - icmp_param->offset; + if (icmp_param->data_len > room) + icmp_param->data_len = room; + icmp_param->head_len = sizeof(struct icmphdr); - icmp_push_reply(&icmp_param, &fl4, &ipc, &rt); + icmp_push_reply(icmp_param, &fl4, &ipc, &rt); ende: ip_rt_put(rt); out_unlock: icmp_xmit_unlock(sk); +out_free: + kfree(icmp_param); out:; } EXPORT_SYMBOL(icmp_send); @@ -657,7 +664,8 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) } /* - * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH. + * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and + * ICMP_PARAMETERPROB. */ static void icmp_unreach(struct sk_buff *skb) @@ -939,7 +947,8 @@ error: void icmp_err(struct sk_buff *skb, u32 info) { struct iphdr *iph = (struct iphdr *)skb->data; - struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int offset = iph->ihl<<2; + struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset); int type = icmp_hdr(skb)->type; int code = icmp_hdr(skb)->code; struct net *net = dev_net(skb->dev); @@ -949,7 +958,7 @@ void icmp_err(struct sk_buff *skb, u32 info) * triggered by ICMP_ECHOREPLY which sent from kernel. */ if (icmph->type != ICMP_ECHOREPLY) { - ping_err(skb, info); + ping_err(skb, offset, info); return; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d8c232794bcb..450f625361e4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -363,7 +363,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) static int igmpv3_sendpack(struct sk_buff *skb) { struct igmphdr *pig = igmp_hdr(skb); - const int igmplen = skb->tail - skb->transport_header; + const int igmplen = skb_tail_pointer(skb) - skb_transport_header(skb); pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c625e4dad4b0..a982657d05e7 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -235,7 +235,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) */ struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn; - const struct iphdr *iph = (const struct iphdr *)skb->data; + const struct iphdr *iph; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; @@ -281,6 +281,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) else itn = net_generic(net, ipgre_net_id); + iph = (const struct iphdr *)skb->data; t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, iph->daddr, iph->saddr, tpi.key); @@ -428,7 +429,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, return; } - ip_tunnel_xmit(skb, dev, tnl_params); + ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } static netdev_tx_t ipgre_xmit(struct sk_buff *skb, diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e4147ec1665a..7c79cf8ad449 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -487,7 +487,7 @@ drop: EXPORT_SYMBOL_GPL(ip_tunnel_rcv); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, - const struct iphdr *tnl_params) + const struct iphdr *tnl_params, const u8 protocol) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *inner_iph; @@ -503,6 +503,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ @@ -658,7 +659,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); /* Push down and install the IP header. */ skb_push(skb, sizeof(struct iphdr)); @@ -670,7 +670,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, iph->version = 4; iph->ihl = sizeof(struct iphdr) >> 2; iph->frag_off = df; - iph->protocol = tnl_params->protocol; + iph->protocol = protocol; iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); iph->daddr = fl4.daddr; iph->saddr = fl4.saddr; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 59cb8c769056..826be4cb482a 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -47,12 +47,9 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 77bfcce64fe5..9df7ecd393f2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -222,7 +222,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->encapsulation = 1; } - ip_tunnel_xmit(skb, dev, tiph); + ip_tunnel_xmit(skb, dev, tiph, tiph->protocol); return NETDEV_TX_OK; tx_error: diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d9610ae7855..df97f0ac1a1c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -945,6 +945,7 @@ static int ipmr_cache_report(struct mr_table *mrt, struct igmpmsg *msg; struct sock *mroute_sk; int ret; + unsigned long tail_offset; #ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) @@ -980,7 +981,12 @@ static int ipmr_cache_report(struct mr_table *mrt, /* Copy the IP header */ - skb->network_header = skb->tail; + tail_offset = skb_tail_offset(skb); + if (tail_offset > 0xffff) { + kfree_skb(skb); + return -EINVAL; + } + skb_set_network_header(skb, tail_offset); skb_put(skb, ihl); skb_copy_to_linear_data(skb, pkt->data, ihl); ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ @@ -1609,7 +1615,7 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct mr_table *mrt; struct vif_device *v; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 5d5d4d1be9c2..30e4de940567 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -108,7 +108,7 @@ static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - const struct net_device *dev = ptr; + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_DOWN) { @@ -129,7 +129,10 @@ static int masq_inet_event(struct notifier_block *this, void *ptr) { struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; - return masq_device_event(this, event, dev); + struct netdev_notifier_info info; + + netdev_notifier_info_init(&info, dev); + return masq_device_event(this, event, &info); } static struct notifier_block masq_dev_notifier = { diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index c1953d07e2f4..57c671152c42 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -162,7 +162,8 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size) return skb; } -static void ipt_ulog_packet(unsigned int hooknum, +static void ipt_ulog_packet(struct net *net, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -174,7 +175,6 @@ static void ipt_ulog_packet(unsigned int hooknum, size_t size, copy_len; struct nlmsghdr *nlh; struct timeval tv; - struct net *net = dev_net(in ? in : out); struct ulog_net *ulog = ulog_pernet(net); /* ffs == find first bit set, necessary because userspace @@ -231,8 +231,10 @@ static void ipt_ulog_packet(unsigned int hooknum, put_unaligned(tv.tv_usec, &pm->timestamp_usec); put_unaligned(skb->mark, &pm->mark); pm->hook = hooknum; - if (prefix != NULL) - strncpy(pm->prefix, prefix, sizeof(pm->prefix)); + if (prefix != NULL) { + strncpy(pm->prefix, prefix, sizeof(pm->prefix) - 1); + pm->prefix[sizeof(pm->prefix) - 1] = '\0'; + } else if (loginfo->prefix[0] != '\0') strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix)); else @@ -291,12 +293,15 @@ alloc_failure: static unsigned int ulog_tg(struct sk_buff *skb, const struct xt_action_param *par) { - ipt_ulog_packet(par->hooknum, skb, par->in, par->out, + struct net *net = dev_net(par->in ? par->in : par->out); + + ipt_ulog_packet(net, par->hooknum, skb, par->in, par->out, par->targinfo, NULL); return XT_CONTINUE; } -static void ipt_logfn(u_int8_t pf, +static void ipt_logfn(struct net *net, + u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -318,7 +323,7 @@ static void ipt_logfn(u_int8_t pf, strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix)); } - ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); + ipt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix); } static int ulog_tg_check(const struct xt_tgchk_param *par) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 7d93d62cd5fd..1f1b2dd90277 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -33,7 +33,6 @@ #include <linux/netdevice.h> #include <net/snmp.h> #include <net/ip.h> -#include <net/ipv6.h> #include <net/icmp.h> #include <net/protocol.h> #include <linux/skbuff.h> @@ -46,8 +45,18 @@ #include <net/inet_common.h> #include <net/checksum.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <linux/in6.h> +#include <linux/icmpv6.h> +#include <net/addrconf.h> +#include <net/ipv6.h> +#include <net/transp_v6.h> +#endif -static struct ping_table ping_table; + +struct ping_table ping_table; +struct pingv6_ops pingv6_ops; +EXPORT_SYMBOL_GPL(pingv6_ops); static u16 ping_port_rover; @@ -58,6 +67,7 @@ static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int ma pr_debug("hash(%d) = %d\n", num, res); return res; } +EXPORT_SYMBOL_GPL(ping_hash); static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, struct net *net, unsigned int num) @@ -65,7 +75,7 @@ static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; } -static int ping_v4_get_port(struct sock *sk, unsigned short ident) +int ping_get_port(struct sock *sk, unsigned short ident) { struct hlist_nulls_node *node; struct hlist_nulls_head *hlist; @@ -103,6 +113,10 @@ next_port: ping_portaddr_for_each_entry(sk2, node, hlist) { isk2 = inet_sk(sk2); + /* BUG? Why is this reuse and not reuseaddr? ping.c + * doesn't turn off SO_REUSEADDR, and it doesn't expect + * that other ping processes can steal its packets. + */ if ((isk2->inet_num == ident) && (sk2 != sk) && (!sk2->sk_reuse || !sk->sk_reuse)) @@ -125,17 +139,18 @@ fail: write_unlock_bh(&ping_table.lock); return 1; } +EXPORT_SYMBOL_GPL(ping_get_port); -static void ping_v4_hash(struct sock *sk) +void ping_hash(struct sock *sk) { - pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); + pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); BUG(); /* "Please do not press this button again." */ } -static void ping_v4_unhash(struct sock *sk) +void ping_unhash(struct sock *sk) { struct inet_sock *isk = inet_sk(sk); - pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); + pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); if (sk_hashed(sk)) { write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); @@ -146,31 +161,61 @@ static void ping_v4_unhash(struct sock *sk) write_unlock_bh(&ping_table.lock); } } +EXPORT_SYMBOL_GPL(ping_unhash); -static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr, - u16 ident, int dif) +static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) { struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); struct sock *sk = NULL; struct inet_sock *isk; struct hlist_nulls_node *hnode; + int dif = skb->dev->ifindex; + + if (skb->protocol == htons(ETH_P_IP)) { + pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", + (int)ident, &ip_hdr(skb)->daddr, dif); +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n", + (int)ident, &ipv6_hdr(skb)->daddr, dif); +#endif + } - pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", - (int)ident, &daddr, dif); read_lock_bh(&ping_table.lock); ping_portaddr_for_each_entry(sk, hnode, hslot) { isk = inet_sk(sk); - pr_debug("found: %p: num = %d, daddr = %pI4, dif = %d\n", sk, - (int)isk->inet_num, &isk->inet_rcv_saddr, - sk->sk_bound_dev_if); - pr_debug("iterate\n"); if (isk->inet_num != ident) continue; - if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr) - continue; + + if (skb->protocol == htons(ETH_P_IP) && + sk->sk_family == AF_INET) { + pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk, + (int) isk->inet_num, &isk->inet_rcv_saddr, + sk->sk_bound_dev_if); + + if (isk->inet_rcv_saddr && + isk->inet_rcv_saddr != ip_hdr(skb)->daddr) + continue; +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6) && + sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk, + (int) isk->inet_num, + &inet6_sk(sk)->rcv_saddr, + sk->sk_bound_dev_if); + + if (!ipv6_addr_any(&np->rcv_saddr) && + !ipv6_addr_equal(&np->rcv_saddr, + &ipv6_hdr(skb)->daddr)) + continue; +#endif + } + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) continue; @@ -200,7 +245,7 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, } -static int ping_init_sock(struct sock *sk) +int ping_init_sock(struct sock *sk) { struct net *net = sock_net(sk); kgid_t group = current_egid(); @@ -225,8 +270,9 @@ static int ping_init_sock(struct sock *sk) return -EACCES; } +EXPORT_SYMBOL_GPL(ping_init_sock); -static void ping_close(struct sock *sk, long timeout) +void ping_close(struct sock *sk, long timeout) { pr_debug("ping_close(sk=%p,sk->num=%u)\n", inet_sk(sk), inet_sk(sk)->inet_num); @@ -234,36 +280,122 @@ static void ping_close(struct sock *sk, long timeout) sk_common_release(sk); } +EXPORT_SYMBOL_GPL(ping_close); + +/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ +int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, + struct sockaddr *uaddr, int addr_len) { + struct net *net = sock_net(sk); + if (sk->sk_family == AF_INET) { + struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; + int chk_addr_ret; + + if (addr_len < sizeof(*addr)) + return -EINVAL; + + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", + sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); + + chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); + + if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) + chk_addr_ret = RTN_LOCAL; + + if ((sysctl_ip_nonlocal_bind == 0 && + isk->freebind == 0 && isk->transparent == 0 && + chk_addr_ret != RTN_LOCAL) || + chk_addr_ret == RTN_MULTICAST || + chk_addr_ret == RTN_BROADCAST) + return -EADDRNOTAVAIL; + +#if IS_ENABLED(CONFIG_IPV6) + } else if (sk->sk_family == AF_INET6) { + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; + int addr_type, scoped, has_addr; + struct net_device *dev = NULL; + + if (addr_len < sizeof(*addr)) + return -EINVAL; + + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", + sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); + + addr_type = ipv6_addr_type(&addr->sin6_addr); + scoped = __ipv6_addr_needs_scope_id(addr_type); + if ((addr_type != IPV6_ADDR_ANY && + !(addr_type & IPV6_ADDR_UNICAST)) || + (scoped && !addr->sin6_scope_id)) + return -EINVAL; + + rcu_read_lock(); + if (addr->sin6_scope_id) { + dev = dev_get_by_index_rcu(net, addr->sin6_scope_id); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + } + has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev, + scoped); + rcu_read_unlock(); + + if (!(isk->freebind || isk->transparent || has_addr || + addr_type == IPV6_ADDR_ANY)) + return -EADDRNOTAVAIL; + + if (scoped) + sk->sk_bound_dev_if = addr->sin6_scope_id; +#endif + } else { + return -EAFNOSUPPORT; + } + return 0; +} + +void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) +{ + if (saddr->sa_family == AF_INET) { + struct inet_sock *isk = inet_sk(sk); + struct sockaddr_in *addr = (struct sockaddr_in *) saddr; + isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; +#if IS_ENABLED(CONFIG_IPV6) + } else if (saddr->sa_family == AF_INET6) { + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr; + struct ipv6_pinfo *np = inet6_sk(sk); + np->rcv_saddr = np->saddr = addr->sin6_addr; +#endif + } +} +void ping_clear_saddr(struct sock *sk, int dif) +{ + sk->sk_bound_dev_if = dif; + if (sk->sk_family == AF_INET) { + struct inet_sock *isk = inet_sk(sk); + isk->inet_rcv_saddr = isk->inet_saddr = 0; +#if IS_ENABLED(CONFIG_IPV6) + } else if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr)); + memset(&np->saddr, 0, sizeof(np->saddr)); +#endif + } +} /* * We need our own bind because there are no privileged id's == local ports. * Moreover, we don't allow binding to multi- and broadcast addresses. */ -static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct inet_sock *isk = inet_sk(sk); unsigned short snum; - int chk_addr_ret; int err; + int dif = sk->sk_bound_dev_if; - if (addr_len < sizeof(struct sockaddr_in)) - return -EINVAL; - - pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n", - sk, addr->sin_addr.s_addr, ntohs(addr->sin_port)); - - chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); - if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) - chk_addr_ret = RTN_LOCAL; - - if ((sysctl_ip_nonlocal_bind == 0 && - isk->freebind == 0 && isk->transparent == 0 && - chk_addr_ret != RTN_LOCAL) || - chk_addr_ret == RTN_MULTICAST || - chk_addr_ret == RTN_BROADCAST) - return -EADDRNOTAVAIL; + err = ping_check_bind_addr(sk, isk, uaddr, addr_len); + if (err) + return err; lock_sock(sk); @@ -272,42 +404,50 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto out; err = -EADDRINUSE; - isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; - snum = ntohs(addr->sin_port); - if (ping_v4_get_port(sk, snum) != 0) { - isk->inet_saddr = isk->inet_rcv_saddr = 0; + ping_set_saddr(sk, uaddr); + snum = ntohs(((struct sockaddr_in *)uaddr)->sin_port); + if (ping_get_port(sk, snum) != 0) { + ping_clear_saddr(sk, dif); goto out; } - pr_debug("after bind(): num = %d, daddr = %pI4, dif = %d\n", + pr_debug("after bind(): num = %d, dif = %d\n", (int)isk->inet_num, - &isk->inet_rcv_saddr, (int)sk->sk_bound_dev_if); err = 0; - if (isk->inet_rcv_saddr) + if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) || + (sk->sk_family == AF_INET6 && + !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr))) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; + if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; isk->inet_sport = htons(isk->inet_num); isk->inet_daddr = 0; isk->inet_dport = 0; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr)); +#endif + sk_dst_reset(sk); out: release_sock(sk); pr_debug("ping_v4_bind -> %d\n", err); return err; } +EXPORT_SYMBOL_GPL(ping_bind); /* * Is this a supported type of ICMP message? */ -static inline int ping_supported(int type, int code) +static inline int ping_supported(int family, int type, int code) { - if (type == ICMP_ECHO && code == 0) - return 1; - return 0; + return (family == AF_INET && type == ICMP_ECHO && code == 0) || + (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0); } /* @@ -315,30 +455,42 @@ static inline int ping_supported(int type, int code) * sort of error condition. */ -static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); - -void ping_err(struct sk_buff *skb, u32 info) +void ping_err(struct sk_buff *skb, int offset, u32 info) { - struct iphdr *iph = (struct iphdr *)skb->data; - struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int family; + struct icmphdr *icmph; struct inet_sock *inet_sock; - int type = icmp_hdr(skb)->type; - int code = icmp_hdr(skb)->code; + int type; + int code; struct net *net = dev_net(skb->dev); struct sock *sk; int harderr; int err; + if (skb->protocol == htons(ETH_P_IP)) { + family = AF_INET; + type = icmp_hdr(skb)->type; + code = icmp_hdr(skb)->code; + icmph = (struct icmphdr *)(skb->data + offset); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + family = AF_INET6; + type = icmp6_hdr(skb)->icmp6_type; + code = icmp6_hdr(skb)->icmp6_code; + icmph = (struct icmphdr *) (skb->data + offset); + } else { + BUG(); + } + /* We assume the packet has already been checked by icmp_unreach */ - if (!ping_supported(icmph->type, icmph->code)) + if (!ping_supported(family, icmph->type, icmph->code)) return; - pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type, - code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); + pr_debug("ping_err(proto=0x%x,type=%d,code=%d,id=%04x,seq=%04x)\n", + skb->protocol, type, code, ntohs(icmph->un.echo.id), + ntohs(icmph->un.echo.sequence)); - sk = ping_v4_lookup(net, iph->daddr, iph->saddr, - ntohs(icmph->un.echo.id), skb->dev->ifindex); + sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk == NULL) { pr_debug("no socket, dropping\n"); return; /* No socket for error */ @@ -349,72 +501,83 @@ void ping_err(struct sk_buff *skb, u32 info) harderr = 0; inet_sock = inet_sk(sk); - switch (type) { - default: - case ICMP_TIME_EXCEEDED: - err = EHOSTUNREACH; - break; - case ICMP_SOURCE_QUENCH: - /* This is not a real error but ping wants to see it. - * Report it with some fake errno. */ - err = EREMOTEIO; - break; - case ICMP_PARAMETERPROB: - err = EPROTO; - harderr = 1; - break; - case ICMP_DEST_UNREACH: - if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ - ipv4_sk_update_pmtu(skb, sk, info); - if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { - err = EMSGSIZE; - harderr = 1; - break; + if (skb->protocol == htons(ETH_P_IP)) { + switch (type) { + default: + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + case ICMP_SOURCE_QUENCH: + /* This is not a real error but ping wants to see it. + * Report it with some fake errno. + */ + err = EREMOTEIO; + break; + case ICMP_PARAMETERPROB: + err = EPROTO; + harderr = 1; + break; + case ICMP_DEST_UNREACH: + if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + ipv4_sk_update_pmtu(skb, sk, info); + if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { + err = EMSGSIZE; + harderr = 1; + break; + } + goto out; } - goto out; - } - err = EHOSTUNREACH; - if (code <= NR_ICMP_UNREACH) { - harderr = icmp_err_convert[code].fatal; - err = icmp_err_convert[code].errno; + err = EHOSTUNREACH; + if (code <= NR_ICMP_UNREACH) { + harderr = icmp_err_convert[code].fatal; + err = icmp_err_convert[code].errno; + } + break; + case ICMP_REDIRECT: + /* See ICMP_SOURCE_QUENCH */ + ipv4_sk_redirect(skb, sk); + err = EREMOTEIO; + break; } - break; - case ICMP_REDIRECT: - /* See ICMP_SOURCE_QUENCH */ - ipv4_sk_redirect(skb, sk); - err = EREMOTEIO; - break; +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + harderr = pingv6_ops.icmpv6_err_convert(type, code, &err); +#endif } /* * RFC1122: OK. Passes ICMP errors back to application, as per * 4.1.3.3. */ - if (!inet_sock->recverr) { + if ((family == AF_INET && !inet_sock->recverr) || + (family == AF_INET6 && !inet6_sk(sk)->recverr)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { - ip_icmp_error(sk, skb, err, 0 /* no remote port */, - info, (u8 *)icmph); + if (family == AF_INET) { + ip_icmp_error(sk, skb, err, 0 /* no remote port */, + info, (u8 *)icmph); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + pingv6_ops.ipv6_icmp_error(sk, skb, err, 0, + info, (u8 *)icmph); +#endif + } } sk->sk_err = err; sk->sk_error_report(sk); out: sock_put(sk); } +EXPORT_SYMBOL_GPL(ping_err); /* - * Copy and checksum an ICMP Echo packet from user space into a buffer. + * Copy and checksum an ICMP Echo packet from user space into a buffer + * starting from the payload. */ -struct pingfakehdr { - struct icmphdr icmph; - struct iovec *iov; - __wsum wcheck; -}; - -static int ping_getfrag(void *from, char *to, - int offset, int fraglen, int odd, struct sk_buff *skb) +int ping_getfrag(void *from, char *to, + int offset, int fraglen, int odd, struct sk_buff *skb) { struct pingfakehdr *pfh = (struct pingfakehdr *)from; @@ -425,20 +588,33 @@ static int ping_getfrag(void *from, char *to, pfh->iov, 0, fraglen - sizeof(struct icmphdr), &pfh->wcheck)) return -EFAULT; + } else if (offset < sizeof(struct icmphdr)) { + BUG(); + } else { + if (csum_partial_copy_fromiovecend + (to, pfh->iov, offset - sizeof(struct icmphdr), + fraglen, &pfh->wcheck)) + return -EFAULT; + } - return 0; +#if IS_ENABLED(CONFIG_IPV6) + /* For IPv6, checksum each skb as we go along, as expected by + * icmpv6_push_pending_frames. For IPv4, accumulate the checksum in + * wcheck, it will be finalized in ping_v4_push_pending_frames. + */ + if (pfh->family == AF_INET6) { + skb->csum = pfh->wcheck; + skb->ip_summed = CHECKSUM_NONE; + pfh->wcheck = 0; } - if (offset < sizeof(struct icmphdr)) - BUG(); - if (csum_partial_copy_fromiovecend - (to, pfh->iov, offset - sizeof(struct icmphdr), - fraglen, &pfh->wcheck)) - return -EFAULT; +#endif + return 0; } +EXPORT_SYMBOL_GPL(ping_getfrag); -static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, - struct flowi4 *fl4) +static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, + struct flowi4 *fl4) { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); @@ -450,24 +626,9 @@ static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, return ip_push_pending_frames(sk, fl4); } -static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) -{ - struct net *net = sock_net(sk); - struct flowi4 fl4; - struct inet_sock *inet = inet_sk(sk); - struct ipcm_cookie ipc; - struct icmphdr user_icmph; - struct pingfakehdr pfh; - struct rtable *rt = NULL; - struct ip_options_data opt_copy; - int free = 0; - __be32 saddr, daddr, faddr; - u8 tos; - int err; - - pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); - +int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, + void *user_icmph, size_t icmph_len) { + u8 type, code; if (len > 0xFFFF) return -EMSGSIZE; @@ -482,15 +643,53 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* * Fetch the ICMP header provided by the userland. - * iovec is modified! + * iovec is modified! The ICMP header is consumed. */ - - if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov, - sizeof(struct icmphdr))) + if (memcpy_fromiovec(user_icmph, msg->msg_iov, icmph_len)) return -EFAULT; - if (!ping_supported(user_icmph.type, user_icmph.code)) + + if (family == AF_INET) { + type = ((struct icmphdr *) user_icmph)->type; + code = ((struct icmphdr *) user_icmph)->code; +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + type = ((struct icmp6hdr *) user_icmph)->icmp6_type; + code = ((struct icmp6hdr *) user_icmph)->icmp6_code; +#endif + } else { + BUG(); + } + + if (!ping_supported(family, type, code)) return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(ping_common_sendmsg); + +int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct net *net = sock_net(sk); + struct flowi4 fl4; + struct inet_sock *inet = inet_sk(sk); + struct ipcm_cookie ipc; + struct icmphdr user_icmph; + struct pingfakehdr pfh; + struct rtable *rt = NULL; + struct ip_options_data opt_copy; + int free = 0; + __be32 saddr, daddr, faddr; + u8 tos; + int err; + + pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + /* * Get and verify the address. */ @@ -595,13 +794,14 @@ back_from_confirm: pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; pfh.iov = msg->msg_iov; pfh.wcheck = 0; + pfh.family = AF_INET; err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len, 0, &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else - err = ping_push_pending_frames(sk, &pfh, &fl4); + err = ping_v4_push_pending_frames(sk, &pfh, &fl4); release_sock(sk); out: @@ -622,11 +822,13 @@ do_confirm: goto out; } -static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *isk = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + int family = sk->sk_family; + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; struct sk_buff *skb; int copied, err; @@ -636,11 +838,22 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*sin); + if (addr_len) { + if (family == AF_INET) + *addr_len = sizeof(*sin); + else if (family == AF_INET6 && addr_len) + *addr_len = sizeof(*sin6); + } - if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + if (flags & MSG_ERRQUEUE) { + if (family == AF_INET) { + return ip_recv_error(sk, msg, len); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + return pingv6_ops.ipv6_recv_error(sk, msg, len); +#endif + } + } skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) @@ -659,15 +872,40 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sock_recv_timestamp(msg, sk, skb); - /* Copy the address. */ - if (sin) { + /* Copy the address and add cmsg data. */ + if (family == AF_INET) { + sin = (struct sockaddr_in *) msg->msg_name; sin->sin_family = AF_INET; sin->sin_port = 0 /* skb->h.uh->source */; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + + if (isk->cmsg_flags) + ip_cmsg_recv(msg, skb); + +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6hdr *ip6 = ipv6_hdr(skb); + sin6 = (struct sockaddr_in6 *) msg->msg_name; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_addr = ip6->saddr; + + sin6->sin6_flowinfo = 0; + if (np->sndflow) + sin6->sin6_flowinfo = ip6_flowinfo(ip6); + + sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); + + if (inet6_sk(sk)->rxopt.all) + pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb); +#endif + } else { + BUG(); } - if (isk->cmsg_flags) - ip_cmsg_recv(msg, skb); + err = copied; done: @@ -676,8 +914,9 @@ out: pr_debug("ping_recvmsg -> %d\n", err); return err; } +EXPORT_SYMBOL_GPL(ping_recvmsg); -static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", inet_sk(sk), inet_sk(sk)->inet_num, skb); @@ -688,6 +927,7 @@ static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } return 0; } +EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); /* @@ -698,10 +938,7 @@ void ping_rcv(struct sk_buff *skb) { struct sock *sk; struct net *net = dev_net(skb->dev); - struct iphdr *iph = ip_hdr(skb); struct icmphdr *icmph = icmp_hdr(skb); - __be32 saddr = iph->saddr; - __be32 daddr = iph->daddr; /* We assume the packet has already been checked by icmp_rcv */ @@ -711,8 +948,7 @@ void ping_rcv(struct sk_buff *skb) /* Push ICMP header back */ skb_push(skb, skb->data - (u8 *)icmph); - sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id), - skb->dev->ifindex); + sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk != NULL) { pr_debug("rcv on socket %p\n", sk); ping_queue_rcv_skb(sk, skb_get(skb)); @@ -723,6 +959,7 @@ void ping_rcv(struct sk_buff *skb) /* We're called from icmp_rcv(). kfree_skb() is done there. */ } +EXPORT_SYMBOL_GPL(ping_rcv); struct proto ping_prot = { .name = "PING", @@ -733,14 +970,14 @@ struct proto ping_prot = { .disconnect = udp_disconnect, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, - .sendmsg = ping_sendmsg, + .sendmsg = ping_v4_sendmsg, .recvmsg = ping_recvmsg, .bind = ping_bind, .backlog_rcv = ping_queue_rcv_skb, .release_cb = ip4_datagram_release_cb, - .hash = ping_v4_hash, - .unhash = ping_v4_unhash, - .get_port = ping_v4_get_port, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, .obj_size = sizeof(struct inet_sock), }; EXPORT_SYMBOL(ping_prot); @@ -764,7 +1001,8 @@ static struct sock *ping_get_first(struct seq_file *seq, int start) continue; sk_nulls_for_each(sk, node, hslot) { - if (net_eq(sock_net(sk), net)) + if (net_eq(sock_net(sk), net) && + sk->sk_family == state->family) goto found; } } @@ -797,17 +1035,24 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) return pos ? NULL : sk; } -static void *ping_seq_start(struct seq_file *seq, loff_t *pos) +void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) { struct ping_iter_state *state = seq->private; state->bucket = 0; + state->family = family; read_lock_bh(&ping_table.lock); return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } +EXPORT_SYMBOL_GPL(ping_seq_start); + +static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos) +{ + return ping_seq_start(seq, pos, AF_INET); +} -static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) +void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *sk; @@ -819,13 +1064,15 @@ static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; return sk; } +EXPORT_SYMBOL_GPL(ping_seq_next); -static void ping_seq_stop(struct seq_file *seq, void *v) +void ping_seq_stop(struct seq_file *seq, void *v) { read_unlock_bh(&ping_table.lock); } +EXPORT_SYMBOL_GPL(ping_seq_stop); -static void ping_format_sock(struct sock *sp, struct seq_file *f, +static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, int bucket, int *len) { struct inet_sock *inet = inet_sk(sp); @@ -846,7 +1093,7 @@ static void ping_format_sock(struct sock *sp, struct seq_file *f, atomic_read(&sp->sk_drops), len); } -static int ping_seq_show(struct seq_file *seq, void *v) +static int ping_v4_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_printf(seq, "%-127s\n", @@ -857,72 +1104,86 @@ static int ping_seq_show(struct seq_file *seq, void *v) struct ping_iter_state *state = seq->private; int len; - ping_format_sock(v, seq, state->bucket, &len); + ping_v4_format_sock(v, seq, state->bucket, &len); seq_printf(seq, "%*s\n", 127 - len, ""); } return 0; } -static const struct seq_operations ping_seq_ops = { - .show = ping_seq_show, - .start = ping_seq_start, +static const struct seq_operations ping_v4_seq_ops = { + .show = ping_v4_seq_show, + .start = ping_v4_seq_start, .next = ping_seq_next, .stop = ping_seq_stop, }; static int ping_seq_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &ping_seq_ops, + struct ping_seq_afinfo *afinfo = PDE_DATA(inode); + return seq_open_net(inode, file, &afinfo->seq_ops, sizeof(struct ping_iter_state)); } -static const struct file_operations ping_seq_fops = { +const struct file_operations ping_seq_fops = { .open = ping_seq_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_net, }; +EXPORT_SYMBOL_GPL(ping_seq_fops); + +static struct ping_seq_afinfo ping_v4_seq_afinfo = { + .name = "icmp", + .family = AF_INET, + .seq_fops = &ping_seq_fops, + .seq_ops = { + .start = ping_v4_seq_start, + .show = ping_v4_seq_show, + .next = ping_seq_next, + .stop = ping_seq_stop, + }, +}; -static int ping_proc_register(struct net *net) +int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo) { struct proc_dir_entry *p; - int rc = 0; - - p = proc_create("icmp", S_IRUGO, net->proc_net, &ping_seq_fops); + p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, + afinfo->seq_fops, afinfo); if (!p) - rc = -ENOMEM; - return rc; + return -ENOMEM; + return 0; } +EXPORT_SYMBOL_GPL(ping_proc_register); -static void ping_proc_unregister(struct net *net) +void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo) { - remove_proc_entry("icmp", net->proc_net); + remove_proc_entry(afinfo->name, net->proc_net); } +EXPORT_SYMBOL_GPL(ping_proc_unregister); - -static int __net_init ping_proc_init_net(struct net *net) +static int __net_init ping_v4_proc_init_net(struct net *net) { - return ping_proc_register(net); + return ping_proc_register(net, &ping_v4_seq_afinfo); } -static void __net_exit ping_proc_exit_net(struct net *net) +static void __net_exit ping_v4_proc_exit_net(struct net *net) { - ping_proc_unregister(net); + ping_proc_unregister(net, &ping_v4_seq_afinfo); } -static struct pernet_operations ping_net_ops = { - .init = ping_proc_init_net, - .exit = ping_proc_exit_net, +static struct pernet_operations ping_v4_net_ops = { + .init = ping_v4_proc_init_net, + .exit = ping_v4_proc_exit_net, }; int __init ping_proc_init(void) { - return register_pernet_subsys(&ping_net_ops); + return register_pernet_subsys(&ping_v4_net_ops); } void ping_proc_exit(void) { - unregister_pernet_subsys(&ping_net_ops); + unregister_pernet_subsys(&ping_v4_net_ops); } #endif diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 550781a17b34..198ea596f2d9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -594,11 +594,25 @@ static inline u32 fnhe_hashfun(__be32 daddr) return hval & (FNHE_HASH_SIZE - 1); } +static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) +{ + rt->rt_pmtu = fnhe->fnhe_pmtu; + rt->dst.expires = fnhe->fnhe_expires; + + if (fnhe->fnhe_gw) { + rt->rt_flags |= RTCF_REDIRECTED; + rt->rt_gateway = fnhe->fnhe_gw; + rt->rt_uses_gateway = 1; + } +} + static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, u32 pmtu, unsigned long expires) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; + struct rtable *rt; + unsigned int i; int depth; u32 hval = fnhe_hashfun(daddr); @@ -627,8 +641,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_gw = gw; if (pmtu) { fnhe->fnhe_pmtu = pmtu; - fnhe->fnhe_expires = expires; + fnhe->fnhe_expires = max(1UL, expires); } + /* Update all cached dsts too */ + rt = rcu_dereference(fnhe->fnhe_rth); + if (rt) + fill_route_from_fnhe(rt, fnhe); } else { if (depth > FNHE_RECLAIM_DEPTH) fnhe = fnhe_oldest(hash); @@ -640,10 +658,23 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_next = hash->chain; rcu_assign_pointer(hash->chain, fnhe); } + fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev)); fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; fnhe->fnhe_expires = expires; + + /* Exception created; mark the cached routes for the nexthop + * stale, so anyone caching it rechecks if this exception + * applies to them. + */ + for_each_possible_cpu(i) { + struct rtable __rcu **prt; + prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); + rt = rcu_dereference(*prt); + if (rt) + rt->dst.obsolete = DST_OBSOLETE_KILL; + } } fnhe->fnhe_stamp = jiffies; @@ -737,10 +768,15 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf { struct rtable *rt; struct flowi4 fl4; + const struct iphdr *iph = (const struct iphdr *) skb->data; + int oif = skb->dev->ifindex; + u8 tos = RT_TOS(iph->tos); + u8 prot = iph->protocol; + u32 mark = skb->mark; rt = (struct rtable *) dst; - ip_rt_build_flow_key(&fl4, sk, skb); + __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); } @@ -917,12 +953,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; - if (!rt->rt_pmtu) { - dst->obsolete = DST_OBSOLETE_KILL; - } else { - rt->rt_pmtu = mtu; - dst->expires = max(1UL, jiffies + ip_rt_mtu_expires); - } + if (rt->rt_pmtu == mtu && + time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) + return; rcu_read_lock(); if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { @@ -1063,11 +1096,11 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) * DST_OBSOLETE_FORCE_CHK which forces validation calls down * into this function always. * - * When a PMTU/redirect information update invalidates a - * route, this is indicated by setting obsolete to - * DST_OBSOLETE_KILL. + * When a PMTU/redirect information update invalidates a route, + * this is indicated by setting obsolete to DST_OBSOLETE_KILL or + * DST_OBSOLETE_DEAD by dst_free(). */ - if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt)) + if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) return NULL; return dst; } @@ -1209,26 +1242,17 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, spin_lock_bh(&fnhe_lock); if (daddr == fnhe->fnhe_daddr) { + int genid = fnhe_genid(dev_net(rt->dst.dev)); struct rtable *orig = rcu_dereference(fnhe->fnhe_rth); - if (orig && rt_is_expired(orig)) { + + if (fnhe->fnhe_genid != genid) { + fnhe->fnhe_genid = genid; fnhe->fnhe_gw = 0; fnhe->fnhe_pmtu = 0; fnhe->fnhe_expires = 0; } - if (fnhe->fnhe_pmtu) { - unsigned long expires = fnhe->fnhe_expires; - unsigned long diff = expires - jiffies; - - if (time_before(jiffies, expires)) { - rt->rt_pmtu = fnhe->fnhe_pmtu; - dst_set_expires(&rt->dst, diff); - } - } - if (fnhe->fnhe_gw) { - rt->rt_flags |= RTCF_REDIRECTED; - rt->rt_gateway = fnhe->fnhe_gw; - rt->rt_uses_gateway = 1; - } else if (!rt->rt_gateway) + fill_route_from_fnhe(rt, fnhe); + if (!rt->rt_gateway) rt->rt_gateway = daddr; rcu_assign_pointer(fnhe->fnhe_rth, rt); @@ -2428,8 +2452,11 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = (struct net *)__ctl->extra1; + if (write) { - rt_cache_flush((struct net *)__ctl->extra1); + rt_cache_flush(net); + fnhe_genid_bump(net); return 0; } @@ -2604,6 +2631,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { atomic_set(&net->rt_genid, 0); + atomic_set(&net->fnhe_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); return 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 53d9c120fbb8..b5d4ad988053 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -436,6 +436,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); + sock_rps_record_flow(sk); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == TCP_LISTEN) return inet_csk_listen_poll(sk); @@ -2887,6 +2889,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, unsigned int mss; struct sk_buff *gso_skb = skb; __sum16 newcheck; + bool ooo_okay, copy_destructor; if (!pskb_may_pull(skb, sizeof(*th))) goto out; @@ -2916,6 +2919,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | SKB_GSO_GRE | + SKB_GSO_MPLS | SKB_GSO_UDP_TUNNEL | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) @@ -2927,10 +2931,18 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, goto out; } + copy_destructor = gso_skb->destructor == tcp_wfree; + ooo_okay = gso_skb->ooo_okay; + /* All segments but the first should have ooo_okay cleared */ + skb->ooo_okay = 0; + segs = skb_segment(skb, features); if (IS_ERR(segs)) goto out; + /* Only first segment might have ooo_okay set */ + segs->ooo_okay = ooo_okay; + delta = htonl(oldlen + (thlen + mss)); skb = segs; @@ -2950,6 +2962,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, thlen, skb->csum)); seq += mss; + if (copy_destructor) { + skb->destructor = gso_skb->destructor; + skb->sk = gso_skb->sk; + /* {tcp|sock}_wfree() use exact truesize accounting : + * sum(skb->truesize) MUST be exactly be gso_skb->truesize + * So we account mss bytes of 'true size' for each segment. + * The last segment will contain the remaining. + */ + skb->truesize = mss; + gso_skb->truesize -= mss; + } skb = skb->next; th = tcp_hdr(skb); @@ -2962,13 +2985,14 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, * is freed at TX completion, and not right now when gso_skb * is freed by GSO engine */ - if (gso_skb->destructor == tcp_wfree) { + if (copy_destructor) { swap(gso_skb->sk, skb->sk); swap(gso_skb->destructor, skb->destructor); swap(gso_skb->truesize, skb->truesize); } - delta = htonl(oldlen + (skb->tail - skb->transport_header) + + delta = htonl(oldlen + (skb_tail_pointer(skb) - + skb_transport_header(skb)) + skb->data_len); th->check = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); @@ -3213,8 +3237,11 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, for (i = 0; i < shi->nr_frags; ++i) { const struct skb_frag_struct *f = &shi->frags[i]; - struct page *page = skb_frag_page(f); - sg_set_page(&sg, page, skb_frag_size(f), f->page_offset); + unsigned int offset = f->page_offset; + struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT); + + sg_set_page(&sg, page, skb_frag_size(f), + offset_in_page(offset)); if (crypto_hash_update(desc, &sg, skb_frag_size(f))) return 1; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d7d369428ae4..907311c9a012 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2243,10 +2243,22 @@ static void DBGUNDO(struct sock *sk, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) +static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) { struct tcp_sock *tp = tcp_sk(sk); + if (unmark_loss) { + struct sk_buff *skb; + + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; + TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; + } + tp->lost_out = 0; + tcp_clear_all_retrans_hints(tp); + } + if (tp->prior_ssthresh) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2255,7 +2267,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) else tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); - if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) { + if (tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; TCP_ECN_withdraw_cwr(tp); } @@ -2263,6 +2275,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); } tp->snd_cwnd_stamp = tcp_time_stamp; + tp->undo_marker = 0; } static inline bool tcp_may_undo(const struct tcp_sock *tp) @@ -2282,14 +2295,13 @@ static bool tcp_try_undo_recovery(struct sock *sk) * or our original transmission succeeded. */ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); - tcp_undo_cwr(sk, true); + tcp_undo_cwnd_reduction(sk, false); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) mib_idx = LINUX_MIB_TCPLOSSUNDO; else mib_idx = LINUX_MIB_TCPFULLUNDO; NET_INC_STATS_BH(sock_net(sk), mib_idx); - tp->undo_marker = 0; } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq @@ -2303,16 +2315,17 @@ static bool tcp_try_undo_recovery(struct sock *sk) } /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ -static void tcp_try_undo_dsack(struct sock *sk) +static bool tcp_try_undo_dsack(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, "D-SACK"); - tcp_undo_cwr(sk, true); - tp->undo_marker = 0; + tcp_undo_cwnd_reduction(sk, false); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); + return true; } + return false; } /* We can clear retrans_stamp when there are no retransmissions in the @@ -2344,60 +2357,20 @@ static bool tcp_any_retrans_done(const struct sock *sk) return false; } -/* Undo during fast recovery after partial ACK. */ - -static int tcp_try_undo_partial(struct sock *sk, int acked) -{ - struct tcp_sock *tp = tcp_sk(sk); - /* Partial ACK arrived. Force Hoe's retransmit. */ - int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering); - - if (tcp_may_undo(tp)) { - /* Plain luck! Hole if filled with delayed - * packet, rather than with a retransmit. - */ - if (!tcp_any_retrans_done(sk)) - tp->retrans_stamp = 0; - - tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); - - DBGUNDO(sk, "Hoe"); - tcp_undo_cwr(sk, false); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); - - /* So... Do not make Hoe's retransmit yet. - * If the first packet was delayed, the rest - * ones are most probably delayed as well. - */ - failed = 0; - } - return failed; -} - /* Undo during loss recovery after partial ACK or using F-RTO. */ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) { struct tcp_sock *tp = tcp_sk(sk); if (frto_undo || tcp_may_undo(tp)) { - struct sk_buff *skb; - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - } - - tcp_clear_all_retrans_hints(tp); + tcp_undo_cwnd_reduction(sk, true); DBGUNDO(sk, "partial loss"); - tp->lost_out = 0; - tcp_undo_cwr(sk, true); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); if (frto_undo) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; - tp->undo_marker = 0; if (frto_undo || tcp_is_sack(tp)) tcp_set_ca_state(sk, TCP_CA_Open); return true; @@ -2430,12 +2403,14 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) TCP_ECN_queue_cwr(tp); } -static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, +static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, int fast_rexmit) { struct tcp_sock *tp = tcp_sk(sk); int sndcnt = 0; int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); + int newly_acked_sacked = prior_unsacked - + (tp->packets_out - tp->sacked_out); tp->prr_delivered += newly_acked_sacked; if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { @@ -2492,7 +2467,7 @@ static void tcp_try_keep_open(struct sock *sk) } } -static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) +static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) { struct tcp_sock *tp = tcp_sk(sk); @@ -2509,7 +2484,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) tcp_moderate_cwnd(tp); } else { - tcp_cwnd_reduction(sk, newly_acked_sacked, 0); + tcp_cwnd_reduction(sk, prior_unsacked, 0); } } @@ -2667,6 +2642,40 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) tcp_xmit_retransmit_queue(sk); } +/* Undo during fast recovery after partial ACK. */ +static bool tcp_try_undo_partial(struct sock *sk, const int acked, + const int prior_unsacked) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (tp->undo_marker && tcp_packet_delayed(tp)) { + /* Plain luck! Hole if filled with delayed + * packet, rather than with a retransmit. + */ + tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); + + /* We are getting evidence that the reordering degree is higher + * than we realized. If there are no retransmits out then we + * can undo. Otherwise we clock out new packets but do not + * mark more packets lost or retransmit more. + */ + if (tp->retrans_out) { + tcp_cwnd_reduction(sk, prior_unsacked, 0); + return true; + } + + if (!tcp_any_retrans_done(sk)) + tp->retrans_stamp = 0; + + DBGUNDO(sk, "partial recovery"); + tcp_undo_cwnd_reduction(sk, true); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); + tcp_try_keep_open(sk); + return true; + } + return false; +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2678,15 +2687,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, - int prior_sacked, bool is_dupack, - int flag) +static void tcp_fastretrans_alert(struct sock *sk, const int acked, + const int prior_unsacked, + bool is_dupack, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && + bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); - int newly_acked_sacked = 0; int fast_rexmit = 0; if (WARN_ON(!tp->packets_out && tp->sacked_out)) @@ -2738,9 +2746,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (!(flag & FLAG_SND_UNA_ADVANCED)) { if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); - } else - do_lost = tcp_try_undo_partial(sk, pkts_acked); - newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; + } else { + if (tcp_try_undo_partial(sk, acked, prior_unsacked)) + return; + /* Partial ACK arrived. Force fast retransmit. */ + do_lost = tcp_is_reno(tp) || + tcp_fackets_out(tp) > tp->reordering; + } + if (tcp_try_undo_dsack(sk)) { + tcp_try_keep_open(sk); + return; + } break; case TCP_CA_Loss: tcp_process_loss(sk, flag, is_dupack); @@ -2754,13 +2770,12 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (is_dupack) tcp_add_reno_sack(sk); } - newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; if (icsk->icsk_ca_state <= TCP_CA_Disorder) tcp_try_undo_dsack(sk); if (!tcp_time_to_recover(sk, flag)) { - tcp_try_to_open(sk, flag, newly_acked_sacked); + tcp_try_to_open(sk, flag, prior_unsacked); return; } @@ -2782,7 +2797,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (do_lost) tcp_update_scoreboard(sk, fast_rexmit); - tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); + tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit); tcp_xmit_retransmit_queue(sk); } @@ -3265,9 +3280,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) bool is_dupack = false; u32 prior_in_flight; u32 prior_fackets; - int prior_packets; - int prior_sacked = tp->sacked_out; - int pkts_acked = 0; + int prior_packets = tp->packets_out; + const int prior_unsacked = tp->packets_out - tp->sacked_out; + int acked = 0; /* Number of packets newly acked */ /* If the ack is older than previous acks * then we can probably ignore it. @@ -3338,21 +3353,20 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) sk->sk_err_soft = 0; icsk->icsk_probes_out = 0; tp->rcv_tstamp = tcp_time_stamp; - prior_packets = tp->packets_out; if (!prior_packets) goto no_queue; /* See if we can take anything off of the retransmit queue. */ + acked = tp->packets_out; flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); - - pkts_acked = prior_packets - tp->packets_out; + acked -= tp->packets_out; if (tcp_ack_is_dubious(sk, flag)) { /* Advance CWND, if state allows this. */ if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, + tcp_fastretrans_alert(sk, acked, prior_unsacked, is_dupack, flag); } else { if (flag & FLAG_DATA_ACKED) @@ -3375,7 +3389,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, + tcp_fastretrans_alert(sk, acked, prior_unsacked, is_dupack, flag); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than @@ -3398,7 +3412,7 @@ old_ack: */ if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, + tcp_fastretrans_alert(sk, acked, prior_unsacked, is_dupack, flag); } @@ -5533,6 +5547,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock *req; int queued = 0; + bool acceptable; tp->rx_opt.saw_tstamp = 0; @@ -5603,157 +5618,147 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; /* step 5: check the ACK field */ - if (true) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | - FLAG_UPDATE_TS_RECENT) > 0; - - switch (sk->sk_state) { - case TCP_SYN_RECV: - if (acceptable) { - /* Once we leave TCP_SYN_RECV, we no longer - * need req so release it. - */ - if (req) { - tcp_synack_rtt_meas(sk, req); - tp->total_retrans = req->num_retrans; - - reqsk_fastopen_remove(sk, req, false); - } else { - /* Make sure socket is routed, for - * correct metrics. - */ - icsk->icsk_af_ops->rebuild_header(sk); - tcp_init_congestion_control(sk); + acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | + FLAG_UPDATE_TS_RECENT) > 0; - tcp_mtup_init(sk); - tcp_init_buffer_space(sk); - tp->copied_seq = tp->rcv_nxt; - } - smp_mb(); - tcp_set_state(sk, TCP_ESTABLISHED); - sk->sk_state_change(sk); - - /* Note, that this wakeup is only for marginal - * crossed SYN case. Passively open sockets - * are not waked up, because sk->sk_sleep == - * NULL and sk->sk_socket == NULL. - */ - if (sk->sk_socket) - sk_wake_async(sk, - SOCK_WAKE_IO, POLL_OUT); - - tp->snd_una = TCP_SKB_CB(skb)->ack_seq; - tp->snd_wnd = ntohs(th->window) << - tp->rx_opt.snd_wscale; - tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - - if (tp->rx_opt.tstamp_ok) - tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - - if (req) { - /* Re-arm the timer because data may - * have been sent out. This is similar - * to the regular data transmission case - * when new data has just been ack'ed. - * - * (TFO) - we could try to be more - * aggressive and retranmitting any data - * sooner based on when they were sent - * out. - */ - tcp_rearm_rto(sk); - } else - tcp_init_metrics(sk); + switch (sk->sk_state) { + case TCP_SYN_RECV: + if (!acceptable) + return 1; - /* Prevent spurious tcp_cwnd_restart() on - * first data packet. - */ - tp->lsndtime = tcp_time_stamp; + /* Once we leave TCP_SYN_RECV, we no longer need req + * so release it. + */ + if (req) { + tcp_synack_rtt_meas(sk, req); + tp->total_retrans = req->num_retrans; - tcp_initialize_rcv_mss(sk); - tcp_fast_path_on(tp); - } else { - return 1; - } - break; + reqsk_fastopen_remove(sk, req, false); + } else { + /* Make sure socket is routed, for correct metrics. */ + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_congestion_control(sk); - case TCP_FIN_WAIT1: - /* If we enter the TCP_FIN_WAIT1 state and we are a - * Fast Open socket and this is the first acceptable - * ACK we have received, this would have acknowledged - * our SYNACK so stop the SYNACK timer. + tcp_mtup_init(sk); + tcp_init_buffer_space(sk); + tp->copied_seq = tp->rcv_nxt; + } + smp_mb(); + tcp_set_state(sk, TCP_ESTABLISHED); + sk->sk_state_change(sk); + + /* Note, that this wakeup is only for marginal crossed SYN case. + * Passively open sockets are not waked up, because + * sk->sk_sleep == NULL and sk->sk_socket == NULL. + */ + if (sk->sk_socket) + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); + + tp->snd_una = TCP_SKB_CB(skb)->ack_seq; + tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); + + if (tp->rx_opt.tstamp_ok) + tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; + + if (req) { + /* Re-arm the timer because data may have been sent out. + * This is similar to the regular data transmission case + * when new data has just been ack'ed. + * + * (TFO) - we could try to be more aggressive and + * retransmitting any data sooner based on when they + * are sent out. */ - if (req != NULL) { - /* Return RST if ack_seq is invalid. - * Note that RFC793 only says to generate a - * DUPACK for it but for TCP Fast Open it seems - * better to treat this case like TCP_SYN_RECV - * above. - */ - if (!acceptable) - return 1; - /* We no longer need the request sock. */ - reqsk_fastopen_remove(sk, req, false); - tcp_rearm_rto(sk); - } - if (tp->snd_una == tp->write_seq) { - struct dst_entry *dst; - - tcp_set_state(sk, TCP_FIN_WAIT2); - sk->sk_shutdown |= SEND_SHUTDOWN; - - dst = __sk_dst_get(sk); - if (dst) - dst_confirm(dst); - - if (!sock_flag(sk, SOCK_DEAD)) - /* Wake up lingering close() */ - sk->sk_state_change(sk); - else { - int tmo; - - if (tp->linger2 < 0 || - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && - after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { - tcp_done(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); - return 1; - } + tcp_rearm_rto(sk); + } else + tcp_init_metrics(sk); - tmo = tcp_fin_time(sk); - if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); - } else if (th->fin || sock_owned_by_user(sk)) { - /* Bad case. We could lose such FIN otherwise. - * It is not a big problem, but it looks confusing - * and not so rare event. We still can lose it now, - * if it spins in bh_lock_sock(), but it is really - * marginal case. - */ - inet_csk_reset_keepalive_timer(sk, tmo); - } else { - tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); - goto discard; - } - } - } - break; + /* Prevent spurious tcp_cwnd_restart() on first data packet */ + tp->lsndtime = tcp_time_stamp; - case TCP_CLOSING: - if (tp->snd_una == tp->write_seq) { - tcp_time_wait(sk, TCP_TIME_WAIT, 0); - goto discard; - } + tcp_initialize_rcv_mss(sk); + tcp_fast_path_on(tp); + break; + + case TCP_FIN_WAIT1: { + struct dst_entry *dst; + int tmo; + + /* If we enter the TCP_FIN_WAIT1 state and we are a + * Fast Open socket and this is the first acceptable + * ACK we have received, this would have acknowledged + * our SYNACK so stop the SYNACK timer. + */ + if (req != NULL) { + /* Return RST if ack_seq is invalid. + * Note that RFC793 only says to generate a + * DUPACK for it but for TCP Fast Open it seems + * better to treat this case like TCP_SYN_RECV + * above. + */ + if (!acceptable) + return 1; + /* We no longer need the request sock. */ + reqsk_fastopen_remove(sk, req, false); + tcp_rearm_rto(sk); + } + if (tp->snd_una != tp->write_seq) break; - case TCP_LAST_ACK: - if (tp->snd_una == tp->write_seq) { - tcp_update_metrics(sk); - tcp_done(sk); - goto discard; - } + tcp_set_state(sk, TCP_FIN_WAIT2); + sk->sk_shutdown |= SEND_SHUTDOWN; + + dst = __sk_dst_get(sk); + if (dst) + dst_confirm(dst); + + if (!sock_flag(sk, SOCK_DEAD)) { + /* Wake up lingering close() */ + sk->sk_state_change(sk); break; } + + if (tp->linger2 < 0 || + (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && + after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { + tcp_done(sk); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + return 1; + } + + tmo = tcp_fin_time(sk); + if (tmo > TCP_TIMEWAIT_LEN) { + inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); + } else if (th->fin || sock_owned_by_user(sk)) { + /* Bad case. We could lose such FIN otherwise. + * It is not a big problem, but it looks confusing + * and not so rare event. We still can lose it now, + * if it spins in bh_lock_sock(), but it is really + * marginal case. + */ + inet_csk_reset_keepalive_timer(sk, tmo); + } else { + tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); + goto discard; + } + break; + } + + case TCP_CLOSING: + if (tp->snd_una == tp->write_seq) { + tcp_time_wait(sk, TCP_TIME_WAIT, 0); + goto discard; + } + break; + + case TCP_LAST_ACK: + if (tp->snd_una == tp->write_seq) { + tcp_update_metrics(sk); + tcp_done(sk); + goto discard; + } + break; } /* step 6: check the URG bit */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 536d40929ba6..ec335fabd5cc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -874,11 +874,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, &md5); tcp_header_size = tcp_options_size + sizeof(struct tcphdr); - if (tcp_packets_in_flight(tp) == 0) { + if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); - skb->ooo_okay = 1; - } else - skb->ooo_okay = 0; + + /* if no packet is in qdisc/device queue, then allow XPS to select + * another queue. + */ + skb->ooo_okay = sk_wmem_alloc_get(sk) == 0; skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0bf5d399a03c..c7338ec79cc0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1967,6 +1967,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; + sock_rps_record_flow(sk); + /* Check for false positives due to checksum errors */ if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) @@ -2381,7 +2383,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | - SKB_GSO_GRE) || + SKB_GSO_GRE | SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 05a5df2febc9..06347dbd32c1 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -63,7 +63,7 @@ static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = { .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, - .priority = 2, + .priority = 3, }; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 9af088d2cdaa..470a9c008e9b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ - raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ + raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 432e084b6b62..21010fddb203 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1486,7 +1486,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev) } int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, - struct net_device *dev, int strict) + const struct net_device *dev, int strict) { struct inet6_ifaddr *ifp; unsigned int hash = inet6_addr_hash(addr); @@ -2660,8 +2660,10 @@ static void init_loopback(struct net_device *dev) sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0); /* Failure cases are ignored */ - if (!IS_ERR(sp_rt)) + if (!IS_ERR(sp_rt)) { + sp_ifa->rt = sp_rt; ip6_ins_rt(sp_rt); + } } read_unlock_bh(&idev->lock); } @@ -2826,9 +2828,9 @@ static void addrconf_ip6_tnl_config(struct net_device *dev) } static int addrconf_notify(struct notifier_block *this, unsigned long event, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *) data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; int err; @@ -4645,13 +4647,16 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, static void dev_disable_change(struct inet6_dev *idev) { + struct netdev_notifier_info info; + if (!idev || !idev->dev) return; + netdev_notifier_info_init(&info, idev->dev); if (idev->cnf.disable_ipv6) - addrconf_notify(NULL, NETDEV_DOWN, idev->dev); + addrconf_notify(NULL, NETDEV_DOWN, &info); else - addrconf_notify(NULL, NETDEV_UP, idev->dev); + addrconf_notify(NULL, NETDEV_UP, &info); } static void addrconf_disable_change(struct net *net, __s32 newf) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ab5c7ad482cd..a5ac969aeefe 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -49,6 +49,7 @@ #include <net/udp.h> #include <net/udplite.h> #include <net/tcp.h> +#include <net/ping.h> #include <net/protocol.h> #include <net/inet_common.h> #include <net/route.h> @@ -840,6 +841,9 @@ static int __init inet6_init(void) if (err) goto out_unregister_udplite_proto; + err = proto_register(&pingv6_prot, 1); + if (err) + goto out_unregister_ping_proto; /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. @@ -930,6 +934,10 @@ static int __init inet6_init(void) if (err) goto ipv6_packet_fail; + err = pingv6_init(); + if (err) + goto pingv6_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -942,6 +950,8 @@ out: sysctl_fail: ipv6_packet_cleanup(); #endif +pingv6_fail: + pingv6_exit(); ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: @@ -985,6 +995,8 @@ register_pernet_fail: rtnl_unregister_all(PF_INET6); out_sock_register_fail: rawv6_exit(); +out_unregister_ping_proto: + proto_unregister(&pingv6_prot); out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 4b56cbbc7890..197e6f4a2b74 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -879,3 +879,30 @@ exit_f: return err; } EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl); + +void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, + __u16 srcp, __u16 destp, int bucket) +{ + struct ipv6_pinfo *np = inet6_sk(sp); + const struct in6_addr *dest, *src; + + dest = &np->daddr; + src = &np->rcv_saddr; + seq_printf(seq, + "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", + bucket, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp, + sp->sk_state, + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, + sock_i_ino(sp), + atomic_read(&sp->sk_refcnt), sp, + atomic_read(&sp->sk_drops)); +} diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index c5e83fae4df4..140748debc4a 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -115,7 +115,7 @@ EXPORT_SYMBOL(ipv6_skip_exthdr); int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) { const unsigned char *nh = skb_network_header(skb); - int packet_len = skb->tail - skb->network_header; + int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); struct ipv6_opt_hdr *hdr; int len; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b4ff0a42b8c7..4b4890bbe16d 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -57,6 +57,7 @@ #include <net/ipv6.h> #include <net/ip6_checksum.h> +#include <net/ping.h> #include <net/protocol.h> #include <net/raw.h> #include <net/rawv6.h> @@ -84,12 +85,18 @@ static inline struct sock *icmpv6_sk(struct net *net) static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { + /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ + struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) ip6_update_pmtu(skb, net, info, 0, 0); else if (type == NDISC_REDIRECT) ip6_redirect(skb, net, 0, 0); + + if (!(type & ICMPV6_INFOMSG_MASK)) + if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) + ping_err(skb, offset, info); } static int icmpv6_rcv(struct sk_buff *skb); @@ -224,7 +231,8 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset) return (*op & 0xC0) == 0x80; } -static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len) +int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; @@ -307,8 +315,8 @@ static void mip6_addr_swap(struct sk_buff *skb) static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif -static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, - struct sock *sk, struct flowi6 *fl6) +struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, + struct sock *sk, struct flowi6 *fl6) { struct dst_entry *dst, *dst2; struct flowi6 fl2; @@ -391,7 +399,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) int err = 0; if ((u8 *)hdr < skb->head || - (skb->network_header + sizeof(*hdr)) > skb->tail) + (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) return; /* @@ -697,7 +705,8 @@ static int icmpv6_rcv(struct sk_buff *skb) skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n", + LIMIT_NETDEBUG(KERN_DEBUG + "ICMPv6 checksum failed [%pI6c > %pI6c]\n", saddr, daddr); goto csum_error; } @@ -718,7 +727,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_ECHO_REPLY: - /* we couldn't care less */ + ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 71b766ee821d..a263b990ee11 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -98,6 +98,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL | + SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d2eedf192330..dae1949019d7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1147,7 +1147,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (WARN_ON(np->cork.opt)) return -EINVAL; - np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); + np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation); if (unlikely(np->cork.opt == NULL)) return -ENOBUFS; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 241fb8ad9fcf..583e8d435f9a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1319,7 +1319,7 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, static int ip6mr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct mr6_table *mrt; struct mif_device *v; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index bfa6cc36ef2a..72c8bfe06bb4 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1409,8 +1409,9 @@ static void mld_sendpack(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); - mldlen = skb->tail - skb->transport_header; + payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) - + sizeof(*pip6); + mldlen = skb_tail_pointer(skb) - skb_transport_header(skb); pip6->payload_len = htons(payload_len); pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 0f9bdc5ee9f3..9ac01dc9402e 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -268,7 +268,8 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; @@ -404,7 +405,8 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2712ab22a174..781dd3c99680 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -693,7 +693,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; @@ -853,7 +853,7 @@ static void ndisc_recv_na(struct sk_buff *skb) const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; @@ -1069,7 +1069,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) __u8 * opt = (__u8 *)(ra_msg + 1); - optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg); + optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - + sizeof(struct ra_msg); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "RA: source address is not link-local\n"); @@ -1346,7 +1347,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) u8 *hdr; struct ndisc_options ndopts; struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb); - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct rd_msg, opt)); #ifdef CONFIG_IPV6_NDISC_NODETYPE @@ -1568,7 +1569,7 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct inet6_dev *idev; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 72836f40b730..95f3f1da0d7f 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -10,6 +10,7 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/export.h> +#include <net/addrconf.h> #include <net/dst.h> #include <net/ipv6.h> #include <net/ip6_route.h> @@ -186,6 +187,10 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, return csum; }; +static const struct nf_ipv6_ops ipv6ops = { + .chk_addr = ipv6_chk_addr, +}; + static const struct nf_afinfo nf_ip6_afinfo = { .family = AF_INET6, .checksum = nf_ip6_checksum, @@ -198,6 +203,7 @@ static const struct nf_afinfo nf_ip6_afinfo = { int __init ipv6_netfilter_init(void) { + RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops); return nf_register_afinfo(&nf_ip6_afinfo); } @@ -206,5 +212,6 @@ int __init ipv6_netfilter_init(void) */ void ipv6_netfilter_fini(void) { + RCU_INIT_POINTER(nf_ipv6_ops, NULL); nf_unregister_afinfo(&nf_ip6_afinfo); } diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 60e9053bab05..47bff6107519 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -71,7 +71,7 @@ static int device_cmp(struct nf_conn *ct, void *ifindex) static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - const struct net_device *dev = ptr; + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_DOWN) @@ -89,8 +89,10 @@ static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = ptr; + struct netdev_notifier_info info; - return masq_device_event(this, event, ifa->idev->dev); + netdev_notifier_info_init(&info, ifa->idev->dev); + return masq_device_event(this, event, &info); } static struct notifier_block masq_inet_notifier = { diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index c2e73e647e44..ab92a3673fbb 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -40,7 +40,8 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) u16 offset = sizeof(struct ipv6hdr); struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c new file mode 100644 index 000000000000..a43110385918 --- /dev/null +++ b/net/ipv6/ping.c @@ -0,0 +1,272 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * "Ping" sockets + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on ipv4/ping.c code. + * + * Authors: Lorenzo Colitti (IPv6 support) + * Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6), + * Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32) + * + */ + +#include <net/addrconf.h> +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/protocol.h> +#include <net/udp.h> +#include <net/transp_v6.h> +#include <net/ping.h> + +struct proto pingv6_prot = { + .name = "PINGv6", + .owner = THIS_MODULE, + .init = ping_init_sock, + .close = ping_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .setsockopt = ipv6_setsockopt, + .getsockopt = ipv6_getsockopt, + .sendmsg = ping_v6_sendmsg, + .recvmsg = ping_recvmsg, + .bind = ping_bind, + .backlog_rcv = ping_queue_rcv_skb, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, + .obj_size = sizeof(struct raw6_sock), +}; +EXPORT_SYMBOL_GPL(pingv6_prot); + +static struct inet_protosw pingv6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + .prot = &pingv6_prot, + .ops = &inet6_dgram_ops, + .no_check = UDP_CSUM_DEFAULT, + .flags = INET_PROTOSW_REUSE, +}; + + +/* Compatibility glue so we can support IPv6 when it's compiled as a module */ +int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +{ + return -EAFNOSUPPORT; +} +int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + return -EAFNOSUPPORT; +} +int dummy_icmpv6_err_convert(u8 type, u8 code, int *err) +{ + return -EAFNOSUPPORT; +} +void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload) {} +int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, int strict) +{ + return 0; +} + +int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct icmp6hdr user_icmph; + int addr_type; + struct in6_addr *daddr; + int iif = 0; + struct flowi6 fl6; + int err; + int hlimit; + struct dst_entry *dst; + struct rt6_info *rt; + struct pingfakehdr pfh; + + pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + + if (msg->msg_name) { + struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name; + if (msg->msg_namelen < sizeof(struct sockaddr_in6) || + u->sin6_family != AF_INET6) { + return -EINVAL; + } + if (sk->sk_bound_dev_if && + sk->sk_bound_dev_if != u->sin6_scope_id) { + return -EINVAL; + } + daddr = &(u->sin6_addr); + iif = u->sin6_scope_id; + } else { + if (sk->sk_state != TCP_ESTABLISHED) + return -EDESTADDRREQ; + daddr = &np->daddr; + } + + if (!iif) + iif = sk->sk_bound_dev_if; + + addr_type = ipv6_addr_type(daddr); + if (__ipv6_addr_needs_scope_id(addr_type) && !iif) + return -EINVAL; + if (addr_type & IPV6_ADDR_MAPPED) + return -EINVAL; + + /* TODO: use ip6_datagram_send_ctl to get options from cmsg */ + + memset(&fl6, 0, sizeof(fl6)); + + fl6.flowi6_proto = IPPROTO_ICMPV6; + fl6.saddr = np->saddr; + fl6.daddr = *daddr; + fl6.fl6_icmp_type = user_icmph.icmp6_type; + fl6.fl6_icmp_code = user_icmph.icmp6_code; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; + + dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1); + if (IS_ERR(dst)) + return PTR_ERR(dst); + rt = (struct rt6_info *) dst; + + np = inet6_sk(sk); + if (!np) + return -EBADF; + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; + + pfh.icmph.type = user_icmph.icmp6_type; + pfh.icmph.code = user_icmph.icmp6_code; + pfh.icmph.checksum = 0; + pfh.icmph.un.echo.id = inet->inet_sport; + pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; + pfh.iov = msg->msg_iov; + pfh.wcheck = 0; + pfh.family = AF_INET6; + + if (ipv6_addr_is_multicast(&fl6.daddr)) + hlimit = np->mcast_hops; + else + hlimit = np->hop_limit; + if (hlimit < 0) + hlimit = ip6_dst_hoplimit(dst); + + err = ip6_append_data(sk, ping_getfrag, &pfh, len, + 0, hlimit, + np->tclass, NULL, &fl6, rt, + MSG_DONTWAIT, np->dontfrag); + + if (err) { + ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev, + ICMP6_MIB_OUTERRORS); + ip6_flush_pending_frames(sk); + } else { + err = icmpv6_push_pending_frames(sk, &fl6, + (struct icmp6hdr *) &pfh.icmph, + len); + } + + return err; +} + +#ifdef CONFIG_PROC_FS +static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos) +{ + return ping_seq_start(seq, pos, AF_INET6); +} + +int ping_v6_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + int bucket = ((struct ping_iter_state *) seq->private)->bucket; + struct inet_sock *inet = inet_sk(v); + __u16 srcp = ntohs(inet->inet_sport); + __u16 destp = ntohs(inet->inet_dport); + ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); + } + return 0; +} + +static struct ping_seq_afinfo ping_v6_seq_afinfo = { + .name = "icmp6", + .family = AF_INET6, + .seq_fops = &ping_seq_fops, + .seq_ops = { + .start = ping_v6_seq_start, + .show = ping_v6_seq_show, + .next = ping_seq_next, + .stop = ping_seq_stop, + }, +}; + +static int __net_init ping_v6_proc_init_net(struct net *net) +{ + return ping_proc_register(net, &ping_v6_seq_afinfo); +} + +static void __net_init ping_v6_proc_exit_net(struct net *net) +{ + return ping_proc_unregister(net, &ping_v6_seq_afinfo); +} + +static struct pernet_operations ping_v6_net_ops = { + .init = ping_v6_proc_init_net, + .exit = ping_v6_proc_exit_net, +}; +#endif + +int __init pingv6_init(void) +{ +#ifdef CONFIG_PROC_FS + int ret = register_pernet_subsys(&ping_v6_net_ops); + if (ret) + return ret; +#endif + pingv6_ops.ipv6_recv_error = ipv6_recv_error; + pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; + return inet6_register_protosw(&pingv6_protosw); +} + +/* This never gets called because it's not possible to unload the ipv6 module, + * but just in case. + */ +void pingv6_exit(void) +{ + pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; + pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; +#ifdef CONFIG_PROC_FS + unregister_pernet_subsys(&ping_v6_net_ops); +#endif + inet6_unregister_protosw(&pingv6_protosw); +} diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index f3c1ff4357ff..51c3285b5d9b 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -90,7 +90,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = { SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), - SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS), + /* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */ SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index eedff8ccded5..c45f7a5c36e9 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1132,7 +1132,8 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) - amount = skb->tail - skb->transport_header; + amount = skb_tail_pointer(skb) - + skb_transport_header(skb); spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); } @@ -1226,45 +1227,16 @@ struct proto rawv6_prot = { }; #ifdef CONFIG_PROC_FS -static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) -{ - struct ipv6_pinfo *np = inet6_sk(sp); - const struct in6_addr *dest, *src; - __u16 destp, srcp; - - dest = &np->daddr; - src = &np->rcv_saddr; - destp = 0; - srcp = inet_sk(sp)->inet_num; - seq_printf(seq, - "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", - i, - src->s6_addr32[0], src->s6_addr32[1], - src->s6_addr32[2], src->s6_addr32[3], srcp, - dest->s6_addr32[0], dest->s6_addr32[1], - dest->s6_addr32[2], dest->s6_addr32[3], destp, - sp->sk_state, - sk_wmem_alloc_get(sp), - sk_rmem_alloc_get(sp), - 0, 0L, 0, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), - 0, - sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); -} - static int raw6_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) - seq_printf(seq, - " sl " - "local_address " - "remote_address " - "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode ref pointer drops\n"); - else - raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + struct sock *sp = v; + __u16 srcp = inet_sk(sp)->inet_num; + ip6_dgram_sock_seq_show(seq, v, srcp, 0, + raw_seq_private(seq)->bucket); + } return 0; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad0aa6b0b86a..2b874185ebb2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1649,7 +1649,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu int optlen, on_link; u8 *lladdr; - optlen = skb->tail - skb->transport_header; + optlen = skb_tail_pointer(skb) - skb_transport_header(skb); optlen -= sizeof(*msg); if (optlen < 0) { @@ -2681,9 +2681,9 @@ errout: } static int ip6_route_dev_notify(struct notifier_block *this, - unsigned long event, void *data) + unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 335363478bbf..6b9c1f128eaf 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -577,6 +577,10 @@ static int ipip6_rcv(struct sk_buff *skb) if (tunnel != NULL) { struct pcpu_tstats *tstats; + if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && + tunnel->parms.iph.protocol != 0) + goto out; + secpath_reset(skb); skb->mac_header = skb->network_header; skb_reset_network_header(skb); @@ -629,6 +633,35 @@ out: return 0; } +static const struct tnl_ptk_info tpi = { + /* no tunnel info required for ipip. */ + .proto = htons(ETH_P_IP), +}; + +static int ipip_rcv(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + struct ip_tunnel *tunnel; + + tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, + iph->saddr, iph->daddr); + if (tunnel != NULL) { + if (tunnel->parms.iph.protocol != IPPROTO_IPIP && + tunnel->parms.iph.protocol != 0) + goto drop; + + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); + } + + return 1; + +drop: + kfree_skb(skb); + return 0; +} + /* * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function * stores the embedded IPv4 address in v4dst and returns true. @@ -877,6 +910,43 @@ tx_error: return NETDEV_TX_OK; } +static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *tiph = &tunnel->parms.iph; + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); + return NETDEV_TX_OK; +} + +static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + ipip_tunnel_xmit(skb, dev); + break; + case htons(ETH_P_IPV6): + ipip6_tunnel_xmit(skb, dev); + break; + default: + goto tx_err; + } + + return NETDEV_TX_OK; + +tx_err: + dev->stats.tx_errors++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; + +} + static void ipip6_tunnel_bind_dev(struct net_device *dev) { struct net_device *tdev = NULL; @@ -1027,7 +1097,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; err = -EINVAL; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || + if (p.iph.protocol != IPPROTO_IPV6 && + p.iph.protocol != IPPROTO_IPIP && + p.iph.protocol != 0) + goto done; + if (p.iph.version != 4 || p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) goto done; if (p.iph.ttl) @@ -1164,7 +1238,7 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops ipip6_netdev_ops = { .ndo_uninit = ipip6_tunnel_uninit, - .ndo_start_xmit = ipip6_tunnel_xmit, + .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_change_mtu = ipip6_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, @@ -1232,6 +1306,22 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) return 0; } +static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + u8 proto; + + if (!data) + return 0; + + proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); + if (proto != IPPROTO_IPV6 && + proto != IPPROTO_IPIP && + proto != 0) + return -EINVAL; + + return 0; +} + static void ipip6_netlink_parms(struct nlattr *data[], struct ip_tunnel_parm *parms) { @@ -1268,6 +1358,10 @@ static void ipip6_netlink_parms(struct nlattr *data[], if (data[IFLA_IPTUN_FLAGS]) parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); + + if (data[IFLA_IPTUN_PROTO]) + parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); + } #ifdef CONFIG_IPV6_SIT_6RD @@ -1391,6 +1485,8 @@ static size_t ipip6_get_size(const struct net_device *dev) nla_total_size(1) + /* IFLA_IPTUN_FLAGS */ nla_total_size(2) + + /* IFLA_IPTUN_PROTO */ + nla_total_size(1) + #ifdef CONFIG_IPV6_SIT_6RD /* IFLA_IPTUN_6RD_PREFIX */ nla_total_size(sizeof(struct in6_addr)) + @@ -1416,6 +1512,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, !!(parm->iph.frag_off & htons(IP_DF))) || + nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) || nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) goto nla_put_failure; @@ -1445,6 +1542,7 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_TOS] = { .type = NLA_U8 }, [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 }, + [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, #ifdef CONFIG_IPV6_SIT_6RD [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) }, [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 }, @@ -1459,6 +1557,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = { .policy = ipip6_policy, .priv_size = sizeof(struct ip_tunnel), .setup = ipip6_tunnel_setup, + .validate = ipip6_validate, .newlink = ipip6_newlink, .changelink = ipip6_changelink, .get_size = ipip6_get_size, @@ -1471,6 +1570,12 @@ static struct xfrm_tunnel sit_handler __read_mostly = { .priority = 1, }; +static struct xfrm_tunnel ipip_handler __read_mostly = { + .handler = ipip_rcv, + .err_handler = ipip6_err, + .priority = 2, +}; + static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) { int prio; @@ -1553,6 +1658,7 @@ static void __exit sit_cleanup(void) { rtnl_link_unregister(&sit_link_ops); xfrm4_tunnel_deregister(&sit_handler, AF_INET6); + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); unregister_pernet_device(&sit_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ @@ -1569,9 +1675,14 @@ static int __init sit_init(void) return err; err = xfrm4_tunnel_register(&sit_handler, AF_INET6); if (err < 0) { - pr_info("%s: can't add protocol\n", __func__); + pr_info("%s: can't register ip6ip4\n", __func__); goto xfrm_tunnel_failed; } + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + pr_info("%s: can't register ip4ip4\n", __func__); + goto xfrm_tunnel4_failed; + } err = rtnl_link_register(&sit_link_ops); if (err < 0) goto rtnl_link_failed; @@ -1580,6 +1691,8 @@ out: return err; rtnl_link_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel4_failed: xfrm4_tunnel_deregister(&sit_handler, AF_INET6); xfrm_tunnel_failed: unregister_pernet_device(&sit_net_ops); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 42923b14dfa6..b5808539cd5c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1359,48 +1359,17 @@ static const struct inet6_protocol udpv6_protocol = { /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS - -static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket) -{ - struct inet_sock *inet = inet_sk(sp); - struct ipv6_pinfo *np = inet6_sk(sp); - const struct in6_addr *dest, *src; - __u16 destp, srcp; - - dest = &np->daddr; - src = &np->rcv_saddr; - destp = ntohs(inet->inet_dport); - srcp = ntohs(inet->inet_sport); - seq_printf(seq, - "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", - bucket, - src->s6_addr32[0], src->s6_addr32[1], - src->s6_addr32[2], src->s6_addr32[3], srcp, - dest->s6_addr32[0], dest->s6_addr32[1], - dest->s6_addr32[2], dest->s6_addr32[3], destp, - sp->sk_state, - sk_wmem_alloc_get(sp), - sk_rmem_alloc_get(sp), - 0, 0L, 0, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), - 0, - sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, - atomic_read(&sp->sk_drops)); -} - int udp6_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) - seq_printf(seq, - " sl " - "local_address " - "remote_address " - "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode ref pointer drops\n"); - else - udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket); + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + int bucket = ((struct udp_iter_state *)seq->private)->bucket; + struct inet_sock *inet = inet_sk(v); + __u16 srcp = ntohs(inet->inet_sport); + __u16 destp = ntohs(inet->inet_dport); + ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); + } return 0; } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 3bb3a891a424..5d1b8d7ac993 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -46,11 +46,12 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, unsigned int mss; unsigned int unfrag_ip6hlen, unfrag_len; struct frag_hdr *fptr; - u8 *mac_start, *prevhdr; + u8 *packet_start, *prevhdr; u8 nexthdr; u8 frag_hdr_sz = sizeof(struct frag_hdr); int offset; __wsum csum; + int tnl_hlen; mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) @@ -63,7 +64,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | - SKB_GSO_GRE) || + SKB_GSO_GRE | + SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; @@ -83,9 +85,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, skb->ip_summed = CHECKSUM_NONE; /* Check if there is enough headroom to insert fragment header. */ - if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) && - pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) - goto out; + tnl_hlen = skb_tnl_header_len(skb); + if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) { + if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) + goto out; + } /* Find the unfragmentable header and shift it left by frag_hdr_sz * bytes to insert fragment header. @@ -93,11 +97,12 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); nexthdr = *prevhdr; *prevhdr = NEXTHDR_FRAGMENT; - unfrag_len = skb_network_header(skb) - skb_mac_header(skb) + - unfrag_ip6hlen; - mac_start = skb_mac_header(skb); - memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len); + unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + + unfrag_ip6hlen + tnl_hlen; + packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset; + memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len); + SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz; skb->mac_header -= frag_hdr_sz; skb->network_header -= frag_hdr_sz; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index f547a47d381c..7a1e0fc1bd4d 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -330,7 +330,7 @@ static __inline__ void __ipxitf_put(struct ipx_interface *intrfc) static int ipxitf_device_event(struct notifier_block *notifier, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ipx_interface *i, *tmp; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 8c004161a843..9ea0c933b9ff 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -544,7 +544,7 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self, /* * We now have some discovery info to deliver! */ - discovery = kmalloc(sizeof(discovery_t), GFP_ATOMIC); + discovery = kzalloc(sizeof(discovery_t), GFP_ATOMIC); if (!discovery) { IRDA_WARNING("%s: unable to malloc!\n", __func__); return; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index ae691651b721..168aff5e60de 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2293,7 +2293,7 @@ out_unlock: static int afiucv_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *event_dev = (struct net_device *)ptr; + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); struct sock *sk; struct iucv_sock *iucv; diff --git a/net/key/af_key.c b/net/key/af_key.c index 5b1e5af25713..c5fbd7589681 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2366,6 +2366,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa out: xfrm_pol_put(xp); + if (err == 0) + xfrm_garbage_collect(net); return err; } @@ -2615,6 +2617,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ out: xfrm_pol_put(xp); + if (delete && err == 0) + xfrm_garbage_collect(net); return err; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 158e6eb188d3..44be28cfc6c4 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1267,6 +1267,7 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata); void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata); void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata, __le16 fc, bool acked); +void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata); /* IBSS code */ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 60f1ce5e5e52..7c3ba8628d4e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -159,9 +159,10 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int ieee80211_verify_mac(struct ieee80211_local *local, u8 *addr) +static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr) { - struct ieee80211_sub_if_data *sdata; + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *iter; u64 new, mask, tmp; u8 *m; int ret = 0; @@ -181,11 +182,14 @@ static int ieee80211_verify_mac(struct ieee80211_local *local, u8 *addr) mutex_lock(&local->iflist_mtx); - list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type == NL80211_IFTYPE_MONITOR) + list_for_each_entry(iter, &local->interfaces, list) { + if (iter == sdata) + continue; + + if (iter->vif.type == NL80211_IFTYPE_MONITOR) continue; - m = sdata->vif.addr; + m = iter->vif.addr; tmp = ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) | ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) | ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8); @@ -209,7 +213,7 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) if (ieee80211_sdata_running(sdata)) return -EBUSY; - ret = ieee80211_verify_mac(sdata->local, sa->sa_data); + ret = ieee80211_verify_mac(sdata, sa->sa_data); if (ret) return ret; @@ -474,6 +478,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) master->control_port_protocol; sdata->control_port_no_encrypt = master->control_port_no_encrypt; + sdata->vif.cab_queue = master->vif.cab_queue; + memcpy(sdata->vif.hw_queue, master->vif.hw_queue, + sizeof(sdata->vif.hw_queue)); break; } case NL80211_IFTYPE_AP: @@ -653,7 +660,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) ieee80211_recalc_ps(local, -1); - if (dev) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + /* XXX: for AP_VLAN, actually track AP queues */ + netif_tx_start_all_queues(dev); + } else if (dev) { unsigned long flags; int n_acs = IEEE80211_NUM_ACS; int ac; @@ -1479,7 +1490,17 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, break; } + /* + * Pick address of existing interface in case user changed + * MAC address manually, default to perm_addr. + */ m = local->hw.wiphy->perm_addr; + list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) + continue; + m = sdata->vif.addr; + break; + } start = ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) | ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) | ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8); @@ -1696,6 +1717,15 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) ASSERT_RTNL(); + /* + * Close all AP_VLAN interfaces first, as otherwise they + * might be closed while the AP interface they belong to + * is closed, causing unregister_netdevice_many() to crash. + */ + list_for_each_entry(sdata, &local->interfaces, list) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + dev_close(sdata->dev); + mutex_lock(&local->iflist_mtx); list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { list_del(&sdata->list); @@ -1717,10 +1747,9 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) } static int netdev_notify(struct notifier_block *nb, - unsigned long state, - void *ndev) + unsigned long state, void *ptr) { - struct net_device *dev = ndev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ieee80211_sub_if_data *sdata; if (state != NETDEV_CHANGENAME) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 29620bfc7a69..a8c2130c8ba4 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1015,7 +1015,8 @@ static void ieee80211_chswitch_timer(unsigned long data) static void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, - u64 timestamp, struct ieee802_11_elems *elems) + u64 timestamp, struct ieee802_11_elems *elems, + bool beacon) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -1032,6 +1033,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def new_vht_chandef = {}; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; + const struct ieee80211_ht_operation *ht_oper; int secondary_channel_offset = -1; ASSERT_MGD_MTX(ifmgd); @@ -1048,11 +1050,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, sec_chan_offs = elems->sec_chan_offs; wide_bw_chansw_ie = elems->wide_bw_chansw_ie; + ht_oper = elems->ht_operation; if (ifmgd->flags & (IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_40MHZ)) { sec_chan_offs = NULL; wide_bw_chansw_ie = NULL; + /* only used for bandwidth here */ + ht_oper = NULL; } if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT) @@ -1094,10 +1099,20 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; } - if (sec_chan_offs) { + if (!beacon && sec_chan_offs) { secondary_channel_offset = sec_chan_offs->sec_chan_offs; + } else if (beacon && ht_oper) { + secondary_channel_offset = + ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET; } else if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { - /* if HT is enabled and the IE not present, it's still HT */ + /* + * If it's not a beacon, HT is enabled and the IE not present, + * it's 20 MHz, 802.11-2012 8.5.2.6: + * This element [the Secondary Channel Offset Element] is + * present when switching to a 40 MHz channel. It may be + * present when switching to a 20 MHz channel (in which + * case the secondary channel offset is set to SCN). + */ secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; } @@ -2796,7 +2811,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, mutex_unlock(&local->iflist_mtx); } - ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, elems); + ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, + elems, true); } @@ -3210,7 +3226,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, - &elems); + &elems, false); } else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) { ies_len = skb->len - offsetof(struct ieee80211_mgmt, @@ -3232,7 +3248,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, - &elems); + &elems, false); } break; } @@ -3305,10 +3321,6 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) if (WARN_ON_ONCE(!auth_data)) return -EINVAL; - if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) - tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | - IEEE80211_TX_INTFL_MLME_CONN_TX; - auth_data->tries++; if (auth_data->tries > IEEE80211_AUTH_MAX_TRIES) { @@ -3342,6 +3354,10 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) auth_data->expected_transaction = trans; } + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_INTFL_MLME_CONN_TX; + ieee80211_send_auth(sdata, trans, auth_data->algorithm, status, auth_data->data, auth_data->data_len, auth_data->bss->bssid, @@ -3365,12 +3381,12 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) * will not answer to direct packet in unassociated state. */ ieee80211_send_probe_req(sdata, NULL, ssidie + 2, ssidie[1], - NULL, 0, (u32) -1, true, tx_flags, + NULL, 0, (u32) -1, true, 0, auth_data->bss->channel, false); rcu_read_unlock(); } - if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { + if (tx_flags == 0) { auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; ifmgd->auth_data->timeout_started = true; run_again(ifmgd, auth_data->timeout); @@ -3623,6 +3639,31 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) } } +#ifdef CONFIG_PM +void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + + mutex_lock(&ifmgd->mtx); + if (!ifmgd->associated) { + mutex_unlock(&ifmgd->mtx); + return; + } + + if (sdata->flags & IEEE80211_SDATA_DISCONNECT_RESUME) { + sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME; + mlme_dbg(sdata, "driver requested disconnect after resume\n"); + ieee80211_sta_connection_lost(sdata, + ifmgd->associated->bssid, + WLAN_REASON_UNSPECIFIED, + true); + mutex_unlock(&ifmgd->mtx); + return; + } + mutex_unlock(&ifmgd->mtx); +} +#endif + /* interface setup */ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) { @@ -4329,7 +4370,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; bool tx = !req->local_state_change; - bool sent_frame = false; + bool report_frame = false; mutex_lock(&ifmgd->mtx); @@ -4346,7 +4387,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_destroy_auth_data(sdata, false); mutex_unlock(&ifmgd->mtx); - sent_frame = tx; + report_frame = true; goto out; } @@ -4354,12 +4395,12 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ether_addr_equal(ifmgd->associated->bssid, req->bssid)) { ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, frame_buf); - sent_frame = tx; + report_frame = true; } mutex_unlock(&ifmgd->mtx); out: - if (sent_frame) + if (report_frame) __cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 0d51877efdb7..d3f414fe67e0 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -688,8 +688,15 @@ int rate_control_set_rates(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, struct ieee80211_sta_rates *rates) { - struct ieee80211_sta_rates *old = rcu_dereference(pubsta->rates); + struct ieee80211_sta_rates *old; + /* + * mac80211 guarantees that this function will not be called + * concurrently, so the following RCU access is safe, even without + * extra locking. This can not be checked easily, so we just set + * the condition to true. + */ + old = rcu_dereference_protected(pubsta->rates, true); rcu_assign_pointer(pubsta->rates, rates); if (old) kfree_rcu(old, rcu_head); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c8447af76ead..8e2952620256 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3036,6 +3036,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, * and location updates. Note that mac80211 * itself never looks at these frames. */ + if (!multicast && + !ether_addr_equal(sdata->vif.addr, hdr->addr1)) + return 0; if (ieee80211_is_public_action(hdr, skb->len)) return 1; if (!ieee80211_is_beacon(hdr->frame_control)) diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 3ed801d90f1e..124b1fdc20d0 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -208,10 +208,10 @@ void ieee80211_get_tkip_p2k(struct ieee80211_key_conf *keyconf, u32 iv32 = get_unaligned_le32(&data[4]); u16 iv16 = data[2] | (data[0] << 8); - spin_lock_bh(&key->u.tkip.txlock); + spin_lock(&key->u.tkip.txlock); ieee80211_compute_tkip_p1k(key, iv32); tkip_mixing_phase2(tk, ctx, iv16, p2k); - spin_unlock_bh(&key->u.tkip.txlock); + spin_unlock(&key->u.tkip.txlock); } EXPORT_SYMBOL(ieee80211_get_tkip_p2k); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3f87fa468b1f..27e07150eb46 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1740,6 +1740,13 @@ int ieee80211_reconfig(struct ieee80211_local *local) mb(); local->resuming = false; + list_for_each_entry(sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(sdata)) + continue; + if (sdata->vif.type == NL80211_IFTYPE_STATION) + ieee80211_sta_restart(sdata); + } + mod_timer(&local->sta_cleanup, jiffies + 1); #else WARN_ON(1); diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig new file mode 100644 index 000000000000..37421db88965 --- /dev/null +++ b/net/mpls/Kconfig @@ -0,0 +1,9 @@ +# +# MPLS configuration +# +config NET_MPLS_GSO + tristate "MPLS: GSO support" + help + This is helper module to allow segmentation of non-MPLS GSO packets + that have had MPLS stack entries pushed onto them and thus + become MPLS GSO packets. diff --git a/net/mpls/Makefile b/net/mpls/Makefile new file mode 100644 index 000000000000..0a3c171be537 --- /dev/null +++ b/net/mpls/Makefile @@ -0,0 +1,4 @@ +# +# Makefile for MPLS. +# +obj-y += mpls_gso.o diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c new file mode 100644 index 000000000000..1bec1219ab81 --- /dev/null +++ b/net/mpls/mpls_gso.c @@ -0,0 +1,108 @@ +/* + * MPLS GSO Support + * + * Authors: Simon Horman (horms@verge.net.au) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on: GSO portions of net/ipv4/gre.c + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/err.h> +#include <linux/module.h> +#include <linux/netdev_features.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> + +static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t mpls_features; + __be16 mpls_protocol; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_TCPV6 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_GRE | + SKB_GSO_MPLS))) + goto out; + + /* Setup inner SKB. */ + mpls_protocol = skb->protocol; + skb->protocol = skb->inner_protocol; + + /* Push back the mac header that skb_mac_gso_segment() has pulled. + * It will be re-pulled by the call to skb_mac_gso_segment() below + */ + __skb_push(skb, skb->mac_len); + + /* Segment inner packet. */ + mpls_features = skb->dev->mpls_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, mpls_features); + + + /* Restore outer protocol. */ + skb->protocol = mpls_protocol; + + /* Re-pull the mac header that the call to skb_mac_gso_segment() + * above pulled. It will be re-pushed after returning + * skb_mac_gso_segment(), an indirect caller of this function. + */ + __skb_push(skb, skb->data - skb_mac_header(skb)); + +out: + return segs; +} + +static int mpls_gso_send_check(struct sk_buff *skb) +{ + return 0; +} + +static struct packet_offload mpls_mc_offload = { + .type = cpu_to_be16(ETH_P_MPLS_MC), + .callbacks = { + .gso_send_check = mpls_gso_send_check, + .gso_segment = mpls_gso_segment, + }, +}; + +static struct packet_offload mpls_uc_offload = { + .type = cpu_to_be16(ETH_P_MPLS_UC), + .callbacks = { + .gso_send_check = mpls_gso_send_check, + .gso_segment = mpls_gso_segment, + }, +}; + +static int __init mpls_gso_init(void) +{ + pr_info("MPLS GSO support\n"); + + dev_add_offload(&mpls_uc_offload); + dev_add_offload(&mpls_mc_offload); + + return 0; +} + +static void __exit mpls_gso_exit(void) +{ + dev_remove_offload(&mpls_uc_offload); + dev_remove_offload(&mpls_mc_offload); +} + +module_init(mpls_gso_init); +module_exit(mpls_gso_exit); + +MODULE_DESCRIPTION("MPLS GSO support"); +MODULE_AUTHOR("Simon Horman (horms@verge.net.au)"); +MODULE_LICENSE("GPL"); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 300539db7bb1..2217363ab422 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -30,6 +30,8 @@ static DEFINE_MUTEX(afinfo_mutex); const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; EXPORT_SYMBOL(nf_afinfo); +const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; +EXPORT_SYMBOL_GPL(nf_ipv6_ops); int nf_register_afinfo(const struct nf_afinfo *afinfo) { diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 085b5880ab0d..05565d2b3a61 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1001,6 +1001,32 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) return th->rst; } +static inline bool is_new_conn(const struct sk_buff *skb, + struct ip_vs_iphdr *iph) +{ + switch (iph->protocol) { + case IPPROTO_TCP: { + struct tcphdr _tcph, *th; + + th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); + if (th == NULL) + return false; + return th->syn; + } + case IPPROTO_SCTP: { + sctp_chunkhdr_t *sch, schunk; + + sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), + sizeof(schunk), &schunk); + if (sch == NULL) + return false; + return sch->type == SCTP_CID_INIT; + } + default: + return false; + } +} + /* Handle response packets: rewrite addresses and send away... */ static unsigned int @@ -1612,6 +1638,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * Check if the packet belongs to an existing connection entry */ cp = pp->conn_in_get(af, skb, &iph, 0); + + if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest && + unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs && + is_new_conn(skb, &iph)) { + ip_vs_conn_expire_now(cp); + __ip_vs_conn_put(cp); + cp = NULL; + } + if (unlikely(!cp) && !iph.fragoffs) { /* No (second) fragments need to enter here, as nf_defrag_ipv6 * replayed fragment zero will already have created the cp diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 70146496e73a..df05c1c276f0 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1487,9 +1487,9 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) * Currently only NETDEV_DOWN is handled to release refs to cached dsts */ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_service *svc; diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 0df269d7c99f..a65edfe4b16c 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -67,8 +67,8 @@ struct ip_vs_sh_bucket { #define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1) struct ip_vs_sh_state { - struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; struct rcu_head rcu_head; + struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; }; /* diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index bd5474adcabc..4b60a87b7596 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -148,7 +148,7 @@ void nf_log_packet(struct net *net, va_start(args, fmt); vsnprintf(prefix, sizeof(prefix), fmt, args); va_end(args); - logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix); + logger->logfn(net, pf, hooknum, skb, in, out, loginfo, prefix); } rcu_read_unlock(); } @@ -368,14 +368,18 @@ static int __net_init nf_log_net_init(struct net *net) return 0; out_sysctl: +#ifdef CONFIG_PROC_FS remove_proc_entry("nf_log", net->nf.proc_netfilter); +#endif return ret; } static void __net_exit nf_log_net_exit(struct net *net) { netfilter_log_sysctl_exit(net); +#ifdef CONFIG_PROC_FS remove_proc_entry("nf_log", net->nf.proc_netfilter); +#endif } static struct pernet_operations nf_log_net_ops = { diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 5fea563afe30..85e20a919081 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -104,7 +104,7 @@ static void mangle_contents(struct sk_buff *skb, /* move post-replacement */ memmove(data + match_offset + rep_len, data + match_offset + match_len, - skb->tail - (skb->network_header + dataoff + + skb_tail_pointer(skb) - (skb_network_header(skb) + dataoff + match_offset + match_len)); /* insert data from buffer */ diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index faf1e9300d8a..962e9792e317 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -602,7 +602,8 @@ static struct nf_loginfo default_loginfo = { /* log handler for internal netfilter logging api */ void -nfulnl_log_packet(u_int8_t pf, +nfulnl_log_packet(struct net *net, + u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -615,7 +616,6 @@ nfulnl_log_packet(u_int8_t pf, const struct nf_loginfo *li; unsigned int qthreshold; unsigned int plen; - struct net *net = dev_net(in ? in : out); struct nfnl_log_net *log = nfnl_log_pernet(net); if (li_user && li_user->type == NF_LOG_TYPE_ULOG) @@ -1045,7 +1045,9 @@ static int __net_init nfnl_log_net_init(struct net *net) static void __net_exit nfnl_log_net_exit(struct net *net) { +#ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); +#endif } static struct pernet_operations nfnl_log_net_ops = { diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index eb2cde836b9a..c011543bff5d 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -799,7 +799,7 @@ static int nfqnl_rcv_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) @@ -1284,7 +1284,9 @@ static int __net_init nfnl_queue_net_init(struct net *net) static void __net_exit nfnl_queue_net_exit(struct net *net) { +#ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); +#endif } static struct pernet_operations nfnl_queue_net_ops = { diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index fe573f6c9e91..5ab24843370a 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -466,7 +466,8 @@ log_packet_common(struct sbuff *m, static void -ipt_log_packet(u_int8_t pf, +ipt_log_packet(struct net *net, + u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -475,7 +476,6 @@ ipt_log_packet(u_int8_t pf, const char *prefix) { struct sbuff *m; - struct net *net = dev_net(in ? in : out); /* FIXME: Disabled from containers until syslog ns is supported */ if (!net_eq(net, &init_net)) @@ -737,7 +737,7 @@ static void dump_ipv6_packet(struct sbuff *m, dump_sk_uid_gid(m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ - if (!recurse && skb->mark) + if (recurse && skb->mark) sb_add(m, "MARK=0x%x ", skb->mark); } @@ -797,7 +797,8 @@ fallback: } static void -ip6t_log_packet(u_int8_t pf, +ip6t_log_packet(struct net *net, + u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -806,7 +807,6 @@ ip6t_log_packet(u_int8_t pf, const char *prefix) { struct sbuff *m; - struct net *net = dev_net(in ? in : out); /* FIXME: Disabled from containers until syslog ns is supported */ if (!net_eq(net, &init_net)) @@ -833,17 +833,18 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_log_info *loginfo = par->targinfo; struct nf_loginfo li; + struct net *net = dev_net(par->in ? par->in : par->out); li.type = NF_LOG_TYPE_LOG; li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; if (par->family == NFPROTO_IPV4) - ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, + ipt_log_packet(net, NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li, loginfo->prefix); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) else if (par->family == NFPROTO_IPV6) - ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in, + ip6t_log_packet(net, NFPROTO_IPV6, par->hooknum, skb, par->in, par->out, &li, loginfo->prefix); #endif else diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index a17dd0f589b2..fb7497c928a0 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -26,13 +26,14 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_nflog_info *info = par->targinfo; struct nf_loginfo li; + struct net *net = dev_net(par->in ? par->in : par->out); li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nfulnl_log_packet(par->family, par->hooknum, skb, par->in, + nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in, par->out, &li, info->prefix); return XT_CONTINUE; } diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index 25fd1c4e1eec..1eb1a44bfd3d 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -30,17 +30,28 @@ static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset) static unsigned int tcpoptstrip_mangle_packet(struct sk_buff *skb, - const struct xt_tcpoptstrip_target_info *info, + const struct xt_action_param *par, unsigned int tcphoff, unsigned int minlen) { + const struct xt_tcpoptstrip_target_info *info = par->targinfo; unsigned int optl, i, j; struct tcphdr *tcph; u_int16_t n, o; u_int8_t *opt; + int len; + + /* This is a fragment, no TCP header is available */ + if (par->fragoff != 0) + return XT_CONTINUE; if (!skb_make_writable(skb, skb->len)) return NF_DROP; + len = skb->len - tcphoff; + if (len < (int)sizeof(struct tcphdr) || + tcp_hdr(skb)->doff * 4 > len) + return NF_DROP; + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); opt = (u_int8_t *)tcph; @@ -76,7 +87,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, static unsigned int tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par) { - return tcpoptstrip_mangle_packet(skb, par->targinfo, ip_hdrlen(skb), + return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb), sizeof(struct iphdr) + sizeof(struct tcphdr)); } @@ -94,7 +105,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par) if (tcphoff < 0) return NF_DROP; - return tcpoptstrip_mangle_packet(skb, par->targinfo, tcphoff, + return tcpoptstrip_mangle_packet(skb, par, tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); } #endif diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index bd93e51d30ac..292934d23482 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -200,7 +200,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) static int tee_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct xt_tee_priv *priv; priv = container_of(this, struct xt_tee_priv, notifier); diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 49c5ff7f6dd6..68ff29f60867 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -22,6 +22,7 @@ #include <net/ip6_fib.h> #endif +#include <linux/netfilter_ipv6.h> #include <linux/netfilter/xt_addrtype.h> #include <linux/netfilter/x_tables.h> @@ -33,12 +34,12 @@ MODULE_ALIAS("ip6t_addrtype"); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, - const struct in6_addr *addr) + const struct in6_addr *addr, u16 mask) { const struct nf_afinfo *afinfo; struct flowi6 flow; struct rt6_info *rt; - u32 ret; + u32 ret = 0; int route_err; memset(&flow, 0, sizeof(flow)); @@ -49,12 +50,19 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, rcu_read_lock(); afinfo = nf_get_afinfo(NFPROTO_IPV6); - if (afinfo != NULL) + if (afinfo != NULL) { + const struct nf_ipv6_ops *v6ops; + + if (dev && (mask & XT_ADDRTYPE_LOCAL)) { + v6ops = nf_get_ipv6_ops(); + if (v6ops && v6ops->chk_addr(net, addr, dev, true)) + ret = XT_ADDRTYPE_LOCAL; + } route_err = afinfo->route(net, (struct dst_entry **)&rt, - flowi6_to_flowi(&flow), !!dev); - else + flowi6_to_flowi(&flow), false); + } else { route_err = 1; - + } rcu_read_unlock(); if (route_err) @@ -62,15 +70,12 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, if (rt->rt6i_flags & RTF_REJECT) ret = XT_ADDRTYPE_UNREACHABLE; - else - ret = 0; - if (rt->rt6i_flags & RTF_LOCAL) + if (dev == NULL && rt->rt6i_flags & RTF_LOCAL) ret |= XT_ADDRTYPE_LOCAL; if (rt->rt6i_flags & RTF_ANYCAST) ret |= XT_ADDRTYPE_ANYCAST; - dst_release(&rt->dst); return ret; } @@ -90,7 +95,7 @@ static bool match_type6(struct net *net, const struct net_device *dev, if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST | XT_ADDRTYPE_UNREACHABLE) & mask) - return !!(mask & match_lookup_rt6(net, dev, addr)); + return !!(mask & match_lookup_rt6(net, dev, addr, mask)); return true; } diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index d8d424337550..6bb1d42f0fac 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -245,6 +245,71 @@ static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry, } } +/** + * netlbl_domhsh_validate - Validate a new domain mapping entry + * @entry: the entry to validate + * + * This function validates the new domain mapping entry to ensure that it is + * a valid entry. Returns zero on success, negative values on failure. + * + */ +static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry) +{ + struct netlbl_af4list *iter4; + struct netlbl_domaddr4_map *map4; +#if IS_ENABLED(CONFIG_IPV6) + struct netlbl_af6list *iter6; + struct netlbl_domaddr6_map *map6; +#endif /* IPv6 */ + + if (entry == NULL) + return -EINVAL; + + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + if (entry->type_def.cipsov4 != NULL || + entry->type_def.addrsel != NULL) + return -EINVAL; + break; + case NETLBL_NLTYPE_CIPSOV4: + if (entry->type_def.cipsov4 == NULL) + return -EINVAL; + break; + case NETLBL_NLTYPE_ADDRSELECT: + netlbl_af4list_foreach(iter4, &entry->type_def.addrsel->list4) { + map4 = netlbl_domhsh_addr4_entry(iter4); + switch (map4->type) { + case NETLBL_NLTYPE_UNLABELED: + if (map4->type_def.cipsov4 != NULL) + return -EINVAL; + break; + case NETLBL_NLTYPE_CIPSOV4: + if (map4->type_def.cipsov4 == NULL) + return -EINVAL; + break; + default: + return -EINVAL; + } + } +#if IS_ENABLED(CONFIG_IPV6) + netlbl_af6list_foreach(iter6, &entry->type_def.addrsel->list6) { + map6 = netlbl_domhsh_addr6_entry(iter6); + switch (map6->type) { + case NETLBL_NLTYPE_UNLABELED: + break; + default: + return -EINVAL; + } + } +#endif /* IPv6 */ + break; + default: + return -EINVAL; + } + + return 0; +} + /* * Domain Hash Table Functions */ @@ -311,6 +376,10 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, struct netlbl_af6list *tmp6; #endif /* IPv6 */ + ret_val = netlbl_domhsh_validate(entry); + if (ret_val != 0) + return ret_val; + /* XXX - we can remove this RCU read lock as the spinlock protects the * entire function, but before we do we need to fixup the * netlbl_af[4,6]list RCU functions to do "the right thing" with diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 8a6c6ea466d8..af3531926ee0 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -708,7 +708,7 @@ unlhsh_remove_return: * netlbl_unlhsh_netdev_handler - Network device notification handler * @this: notifier block * @event: the event - * @ptr: the network device (cast to void) + * @ptr: the netdevice notifier info (cast to void) * * Description: * Handle network device events, although at present all we care about is a @@ -717,10 +717,9 @@ unlhsh_remove_return: * */ static int netlbl_unlhsh_netdev_handler(struct notifier_block *this, - unsigned long event, - void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netlbl_unlhsh_iface *iface = NULL; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 12ac6b47a35c..d0b3dd60d386 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -747,7 +747,7 @@ static void netlink_skb_destructor(struct sk_buff *skb) atomic_dec(&ring->pending); sock_put(sk); - skb->data = NULL; + skb->head = NULL; } #endif if (skb->sk != NULL) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ec0c80fde69f..698814bfa7ad 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -117,7 +117,7 @@ static void nr_kill_by_device(struct net_device *dev) */ static int nr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/nfc/Makefile b/net/nfc/Makefile index fb799deaed4f..a76f4533cb6c 100644 --- a/net/nfc/Makefile +++ b/net/nfc/Makefile @@ -5,7 +5,6 @@ obj-$(CONFIG_NFC) += nfc.o obj-$(CONFIG_NFC_NCI) += nci/ obj-$(CONFIG_NFC_HCI) += hci/ -#obj-$(CONFIG_NFC_LLCP) += llcp/ nfc-objs := core.o netlink.o af_nfc.o rawsock.o llcp_core.o llcp_commands.o \ llcp_sock.o diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index ef4feec6cd84..c3235675f359 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -78,7 +78,7 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct ovs_net *ovs_net; - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vport *vport = NULL; if (!ovs_is_internal_dev(dev)) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8ec1bca7f859..79fe63246b27 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3331,10 +3331,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, } -static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data) +static int packet_notifier(struct notifier_block *this, + unsigned long msg, void *ptr) { struct sock *sk; - struct net_device *dev = data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); rcu_read_lock(); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 45a7df6575de..56a6146ac94b 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -292,9 +292,9 @@ static void phonet_route_autodel(struct net_device *dev) /* notify Phonet of device events */ static int phonet_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (what) { case NETDEV_REGISTER: diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 9c8347451597..e98fcfbe6007 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -202,10 +202,10 @@ static void rose_kill_by_device(struct net_device *dev) /* * Handle device status changes. */ -static int rose_device_event(struct notifier_block *this, unsigned long event, - void *ptr) +static int rose_device_event(struct notifier_block *this, + unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 5d676edc22a6..977c10e0631b 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -243,7 +243,7 @@ nla_put_failure: static int mirred_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct tcf_mirred *m; if (event == NETDEV_UNREGISTER) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 823463adbd21..189e3c5b3d09 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -231,14 +231,14 @@ override: } if (R_tab) { police->rate_present = true; - psched_ratecfg_precompute(&police->rate, R_tab->rate.rate); + psched_ratecfg_precompute(&police->rate, &R_tab->rate); qdisc_put_rtab(R_tab); } else { police->rate_present = false; } if (P_tab) { police->peak_present = true; - psched_ratecfg_precompute(&police->peak, P_tab->rate.rate); + psched_ratecfg_precompute(&police->peak, &P_tab->rate); qdisc_put_rtab(P_tab); } else { police->peak_present = false; @@ -376,9 +376,9 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) }; if (police->rate_present) - opt.rate.rate = psched_ratecfg_getrate(&police->rate); + psched_ratecfg_getrate(&opt.rate, &police->rate); if (police->peak_present) - opt.peakrate.rate = psched_ratecfg_getrate(&police->peak); + psched_ratecfg_getrate(&opt.peakrate, &police->peak); if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt)) goto nla_put_failure; if (police->tcfp_result && diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index eac7e0ee23c1..20224086cc28 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -898,14 +898,16 @@ void dev_shutdown(struct net_device *dev) WARN_ON(timer_pending(&dev->watchdog_timer)); } -void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate) +void psched_ratecfg_precompute(struct psched_ratecfg *r, + const struct tc_ratespec *conf) { u64 factor; u64 mult; int shift; - r->rate_bps = (u64)rate << 3; - r->shift = 0; + memset(r, 0, sizeof(*r)); + r->overhead = conf->overhead; + r->rate_bps = (u64)conf->rate << 3; r->mult = 1; /* * Calibrate mult, shift so that token counting is accurate diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 79b1876b6cd2..adaedd79389c 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -109,7 +109,7 @@ struct htb_class { } un; struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ struct rb_node pq_node; /* node for event queue */ - psched_time_t pq_key; + s64 pq_key; int prio_activity; /* for which prios are we active */ enum htb_cmode cmode; /* current mode of the class */ @@ -121,10 +121,10 @@ struct htb_class { /* token bucket parameters */ struct psched_ratecfg rate; struct psched_ratecfg ceil; - s64 buffer, cbuffer; /* token bucket depth/rate */ - psched_tdiff_t mbuffer; /* max wait time */ - s64 tokens, ctokens; /* current number of tokens */ - psched_time_t t_c; /* checkpoint time */ + s64 buffer, cbuffer; /* token bucket depth/rate */ + s64 mbuffer; /* max wait time */ + s64 tokens, ctokens; /* current number of tokens */ + s64 t_c; /* checkpoint time */ }; struct htb_sched { @@ -141,15 +141,15 @@ struct htb_sched { struct rb_root wait_pq[TC_HTB_MAXDEPTH]; /* time of nearest event per level (row) */ - psched_time_t near_ev_cache[TC_HTB_MAXDEPTH]; + s64 near_ev_cache[TC_HTB_MAXDEPTH]; int defcls; /* class where unclassified flows go to */ /* filters for qdisc itself */ struct tcf_proto *filter_list; - int rate2quantum; /* quant = rate / rate2quantum */ - psched_time_t now; /* cached dequeue time */ + int rate2quantum; /* quant = rate / rate2quantum */ + s64 now; /* cached dequeue time */ struct qdisc_watchdog watchdog; /* non shaped skbs; let them go directly thru */ @@ -664,8 +664,8 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, * next pending event (0 for no event in pq, q->now for too many events). * Note: Applied are events whose have cl->pq_key <= q->now. */ -static psched_time_t htb_do_events(struct htb_sched *q, int level, - unsigned long start) +static s64 htb_do_events(struct htb_sched *q, int level, + unsigned long start) { /* don't run for longer than 2 jiffies; 2 is used instead of * 1 to simplify things when jiffy is going to be incremented @@ -857,7 +857,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) struct sk_buff *skb; struct htb_sched *q = qdisc_priv(sch); int level; - psched_time_t next_event; + s64 next_event; unsigned long start_at; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ @@ -880,7 +880,7 @@ ok: for (level = 0; level < TC_HTB_MAXDEPTH; level++) { /* common case optimization - skip event handler quickly */ int m; - psched_time_t event; + s64 event; if (q->now >= q->near_ev_cache[level]) { event = htb_do_events(q, level, start_at); @@ -1090,9 +1090,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, memset(&opt, 0, sizeof(opt)); - opt.rate.rate = psched_ratecfg_getrate(&cl->rate); + psched_ratecfg_getrate(&opt.rate, &cl->rate); opt.buffer = PSCHED_NS2TICKS(cl->buffer); - opt.ceil.rate = psched_ratecfg_getrate(&cl->ceil); + psched_ratecfg_getrate(&opt.ceil, &cl->ceil); opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer); opt.quantum = cl->quantum; opt.prio = cl->prio; @@ -1117,8 +1117,8 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) if (!cl->level && cl->un.leaf.q) cl->qstats.qlen = cl->un.leaf.q->q.qlen; - cl->xstats.tokens = cl->tokens; - cl->xstats.ctokens = cl->ctokens; + cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens); + cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens); if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 || @@ -1200,7 +1200,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, parent->un.leaf.q = new_q ? new_q : &noop_qdisc; parent->tokens = parent->buffer; parent->ctokens = parent->cbuffer; - parent->t_c = psched_get_time(); + parent->t_c = ktime_to_ns(ktime_get()); parent->cmode = HTB_CAN_SEND; } @@ -1417,8 +1417,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* set class to be in HTB_CAN_SEND state */ cl->tokens = PSCHED_TICKS2NS(hopt->buffer); cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer); - cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */ - cl->t_c = psched_get_time(); + cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */ + cl->t_c = ktime_to_ns(ktime_get()); cl->cmode = HTB_CAN_SEND; /* attach to the hash list and parent's family */ @@ -1459,8 +1459,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->prio = TC_HTB_NUMPRIO - 1; } - psched_ratecfg_precompute(&cl->rate, hopt->rate.rate); - psched_ratecfg_precompute(&cl->ceil, hopt->ceil.rate); + psched_ratecfg_precompute(&cl->rate, &hopt->rate); + psched_ratecfg_precompute(&cl->ceil, &hopt->ceil); cl->buffer = PSCHED_TICKS2NS(hopt->buffer); cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 38008b0980d9..1aaf1b6e51a2 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -341,9 +341,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->tokens = q->buffer; q->ptokens = q->mtu; - psched_ratecfg_precompute(&q->rate, rtab->rate.rate); + psched_ratecfg_precompute(&q->rate, &rtab->rate); if (ptab) { - psched_ratecfg_precompute(&q->peak, ptab->rate.rate); + psched_ratecfg_precompute(&q->peak, &ptab->rate); q->peak_present = true; } else { q->peak_present = false; @@ -393,9 +393,9 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; opt.limit = q->limit; - opt.rate.rate = psched_ratecfg_getrate(&q->rate); + psched_ratecfg_getrate(&opt.rate, &q->rate); if (q->peak_present) - opt.peakrate.rate = psched_ratecfg_getrate(&q->peak); + psched_ratecfg_getrate(&opt.peakrate, &q->peak); else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); opt.mtu = PSCHED_NS2TICKS(q->mtu); diff --git a/net/sctp/input.c b/net/sctp/input.c index 4b2c83146aa7..6533d81a638d 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -589,7 +589,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) struct sctp_association *asoc = NULL; struct sctp_transport *transport; struct inet_sock *inet; - sk_buff_data_t saveip, savesctp; + __u16 saveip, savesctp; int err; struct net *net = dev_net(skb->dev); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 391a245d5203..8ee553b499ce 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -153,7 +153,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct sctp_association *asoc; struct sctp_transport *transport; struct ipv6_pinfo *np; - sk_buff_data_t saveip, savesctp; + __be16 saveip, savesctp; int err; struct net *net = dev_net(skb->dev); diff --git a/net/socket.c b/net/socket.c index 734194d36242..1b930074a292 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2075,8 +2075,12 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, fla { int fput_needed, err; struct msghdr msg_sys; - struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); + struct socket *sock; + + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -2149,6 +2153,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags) { + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; return __sys_sendmmsg(fd, mmsg, vlen, flags); } @@ -2249,8 +2255,12 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, { int fput_needed, err; struct msghdr msg_sys; - struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); + struct socket *sock; + + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -2375,6 +2385,9 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, int datagrams; struct timespec timeout_sys; + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + if (!timeout) return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL); @@ -2492,15 +2505,31 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) (int __user *)a[4]); break; case SYS_SENDMSG: + if (a[2] & MSG_CMSG_COMPAT) { + err = -EINVAL; + break; + } err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); break; case SYS_SENDMMSG: + if (a[3] & MSG_CMSG_COMPAT) { + err = -EINVAL; + break; + } err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]); break; case SYS_RECVMSG: + if (a[2] & MSG_CMSG_COMPAT) { + err = -EINVAL; + break; + } err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); break; case SYS_RECVMMSG: + if (a[3] & MSG_CMSG_COMPAT) { + err = -EINVAL; + break; + } err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3], (struct timespec __user *)a[4]); break; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 7da6b457f66a..fc2f78d6a9b4 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -52,6 +52,8 @@ #include <linux/sunrpc/gss_api.h> #include <asm/uaccess.h> +#include "../netns.h" + static const struct rpc_authops authgss_ops; static const struct rpc_credops gss_credops; @@ -85,8 +87,6 @@ struct gss_auth { }; /* pipe_version >= 0 if and only if someone has a pipe open. */ -static int pipe_version = -1; -static atomic_t pipe_users = ATOMIC_INIT(0); static DEFINE_SPINLOCK(pipe_version_lock); static struct rpc_wait_queue pipe_version_rpc_waitqueue; static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue); @@ -266,24 +266,27 @@ struct gss_upcall_msg { char databuf[UPCALL_BUF_LEN]; }; -static int get_pipe_version(void) +static int get_pipe_version(struct net *net) { + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int ret; spin_lock(&pipe_version_lock); - if (pipe_version >= 0) { - atomic_inc(&pipe_users); - ret = pipe_version; + if (sn->pipe_version >= 0) { + atomic_inc(&sn->pipe_users); + ret = sn->pipe_version; } else ret = -EAGAIN; spin_unlock(&pipe_version_lock); return ret; } -static void put_pipe_version(void) +static void put_pipe_version(struct net *net) { - if (atomic_dec_and_lock(&pipe_users, &pipe_version_lock)) { - pipe_version = -1; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + if (atomic_dec_and_lock(&sn->pipe_users, &pipe_version_lock)) { + sn->pipe_version = -1; spin_unlock(&pipe_version_lock); } } @@ -291,9 +294,10 @@ static void put_pipe_version(void) static void gss_release_msg(struct gss_upcall_msg *gss_msg) { + struct net *net = rpc_net_ns(gss_msg->auth->client); if (!atomic_dec_and_test(&gss_msg->count)) return; - put_pipe_version(); + put_pipe_version(net); BUG_ON(!list_empty(&gss_msg->list)); if (gss_msg->ctx != NULL) gss_put_ctx(gss_msg->ctx); @@ -439,7 +443,10 @@ static void gss_encode_msg(struct gss_upcall_msg *gss_msg, struct rpc_clnt *clnt, const char *service_name) { - if (pipe_version == 0) + struct net *net = rpc_net_ns(clnt); + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + if (sn->pipe_version == 0) gss_encode_v0_msg(gss_msg); else /* pipe_version == 1 */ gss_encode_v1_msg(gss_msg, clnt, service_name); @@ -455,7 +462,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); if (gss_msg == NULL) return ERR_PTR(-ENOMEM); - vers = get_pipe_version(); + vers = get_pipe_version(rpc_net_ns(clnt)); if (vers < 0) { kfree(gss_msg); return ERR_PTR(vers); @@ -559,24 +566,34 @@ out: static inline int gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { + struct net *net = rpc_net_ns(gss_auth->client); + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct rpc_pipe *pipe; struct rpc_cred *cred = &gss_cred->gc_base; struct gss_upcall_msg *gss_msg; + unsigned long timeout; DEFINE_WAIT(wait); - int err = 0; + int err; dprintk("RPC: %s for uid %u\n", __func__, from_kuid(&init_user_ns, cred->cr_uid)); retry: + err = 0; + /* Default timeout is 15s unless we know that gssd is not running */ + timeout = 15 * HZ; + if (!sn->gssd_running) + timeout = HZ >> 2; gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { err = wait_event_interruptible_timeout(pipe_version_waitqueue, - pipe_version >= 0, 15*HZ); - if (pipe_version < 0) { + sn->pipe_version >= 0, timeout); + if (sn->pipe_version < 0) { + if (err == 0) + sn->gssd_running = 0; warn_gssd(); err = -EACCES; } - if (err) + if (err < 0) goto out; goto retry; } @@ -707,20 +724,22 @@ out: static int gss_pipe_open(struct inode *inode, int new_version) { + struct net *net = inode->i_sb->s_fs_info; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int ret = 0; spin_lock(&pipe_version_lock); - if (pipe_version < 0) { + if (sn->pipe_version < 0) { /* First open of any gss pipe determines the version: */ - pipe_version = new_version; + sn->pipe_version = new_version; rpc_wake_up(&pipe_version_rpc_waitqueue); wake_up(&pipe_version_waitqueue); - } else if (pipe_version != new_version) { + } else if (sn->pipe_version != new_version) { /* Trying to open a pipe of a different version */ ret = -EBUSY; goto out; } - atomic_inc(&pipe_users); + atomic_inc(&sn->pipe_users); out: spin_unlock(&pipe_version_lock); return ret; @@ -740,6 +759,7 @@ static int gss_pipe_open_v1(struct inode *inode) static void gss_pipe_release(struct inode *inode) { + struct net *net = inode->i_sb->s_fs_info; struct rpc_pipe *pipe = RPC_I(inode)->pipe; struct gss_upcall_msg *gss_msg; @@ -758,7 +778,7 @@ restart: } spin_unlock(&pipe->lock); - put_pipe_version(); + put_pipe_version(net); } static void diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 871c73c92165..29b4ba93ab3c 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1287,7 +1287,7 @@ static bool use_gss_proxy(struct net *net) #ifdef CONFIG_PROC_FS -static bool set_gss_proxy(struct net *net, int type) +static int set_gss_proxy(struct net *net, int type) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int ret = 0; @@ -1317,10 +1317,12 @@ static inline bool gssp_ready(struct sunrpc_net *sn) return false; } -static int wait_for_gss_proxy(struct net *net) +static int wait_for_gss_proxy(struct net *net, struct file *file) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + if (file->f_flags & O_NONBLOCK && !gssp_ready(sn)) + return -EAGAIN; return wait_event_interruptible(sn->gssp_wq, gssp_ready(sn)); } @@ -1362,7 +1364,7 @@ static ssize_t read_gssp(struct file *file, char __user *buf, size_t len; int ret; - ret = wait_for_gss_proxy(net); + ret = wait_for_gss_proxy(net, file); if (ret) return ret; diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 7111a4c9113b..74d948f5d5a1 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -28,7 +28,11 @@ struct sunrpc_net { wait_queue_head_t gssp_wq; struct rpc_clnt *gssp_clnt; int use_gss_proxy; + int pipe_version; + atomic_t pipe_users; struct proc_dir_entry *use_gssp_proc; + + unsigned int gssd_running; }; extern int sunrpc_net_id; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index a9129f8d7070..e7ce4b3eb0bd 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -216,11 +216,14 @@ rpc_destroy_inode(struct inode *inode) static int rpc_pipe_open(struct inode *inode, struct file *filp) { + struct net *net = inode->i_sb->s_fs_info; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct rpc_pipe *pipe; int first_open; int res = -ENXIO; mutex_lock(&inode->i_mutex); + sn->gssd_running = 1; pipe = RPC_I(inode)->pipe; if (pipe == NULL) goto out; @@ -1069,6 +1072,8 @@ void rpc_pipefs_init_net(struct net *net) struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); mutex_init(&sn->pipefs_sb_lock); + sn->gssd_running = 1; + sn->pipe_version = -1; } /* diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f8529fc8e542..5356b120dbf8 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -324,11 +324,17 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); * Note: If the task is ASYNC, and is being made runnable after sitting on an * rpc_wait_queue, this must be called with the queue spinlock held to protect * the wait queue operation. + * Note the ordering of rpc_test_and_set_running() and rpc_clear_queued(), + * which is needed to ensure that __rpc_execute() doesn't loop (due to the + * lockless RPC_IS_QUEUED() test) before we've had a chance to test + * the RPC_TASK_RUNNING flag. */ static void rpc_make_runnable(struct rpc_task *task) { + bool need_wakeup = !rpc_test_and_set_running(task); + rpc_clear_queued(task); - if (rpc_test_and_set_running(task)) + if (!need_wakeup) return; if (RPC_IS_ASYNC(task)) { INIT_WORK(&task->u.tk_work, rpc_async_schedule); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index c3f9e1ef7f53..06bdf5a1082c 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -810,11 +810,15 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) goto badcred; argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */ argv->iov_len -= slen*4; - + /* + * Note: we skip uid_valid()/gid_valid() checks here for + * backwards compatibility with clients that use -1 id's. + * Instead, -1 uid or gid is later mapped to the + * (export-specific) anonymous id by nfsd_setuser. + * Supplementary gid's will be left alone. + */ cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */ cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */ - if (!uid_valid(cred->cr_uid) || !gid_valid(cred->cr_gid)) - goto badcred; slen = svc_getnl(argv); /* gids length */ if (slen > 16 || (len -= (slen + 2)*4) < 0) goto badcred; @@ -823,8 +827,6 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) return SVC_CLOSE; for (i = 0; i < slen; i++) { kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv)); - if (!gid_valid(kgid)) - goto badcred; GROUP_AT(cred->cr_group_info, i) = kgid; } if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 120a676a3360..fc60bea63169 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -251,9 +251,9 @@ static void disable_bearer(struct tipc_bearer *tb_ptr) * specified device. */ static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv) + void *ptr) { - struct net_device *dev = (struct net_device *)dv; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 2a2864c25e15..baa9df4327d9 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -244,9 +244,9 @@ static void disable_bearer(struct tipc_bearer *tb_ptr) * specified device. */ static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv) + void *ptr) { - struct net_device *dev = (struct net_device *)dv; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ib_bearer *ib_ptr = &ib_bearers[0]; struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; diff --git a/net/wireless/core.c b/net/wireless/core.c index 84c9ad7e1dca..01e41191f1bf 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -638,17 +638,21 @@ int wiphy_register(struct wiphy *wiphy) * cfg80211_mutex lock */ res = rfkill_register(rdev->rfkill); - if (res) - goto out_rm_dev; + if (res) { + device_del(&rdev->wiphy.dev); + + mutex_lock(&cfg80211_mutex); + debugfs_remove_recursive(rdev->wiphy.debugfsdir); + list_del_rcu(&rdev->list); + wiphy_regulatory_deregister(wiphy); + mutex_unlock(&cfg80211_mutex); + return res; + } rtnl_lock(); rdev->wiphy.registered = true; rtnl_unlock(); return 0; - -out_rm_dev: - device_del(&rdev->wiphy.dev); - return res; } EXPORT_SYMBOL(wiphy_register); @@ -866,7 +870,6 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, #endif __cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, true); - cfg80211_mlme_down(rdev, dev); wdev_unlock(wdev); break; case NL80211_IFTYPE_MESH_POINT: @@ -883,10 +886,9 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, } static int cfg80211_netdev_notifier_call(struct notifier_block *nb, - unsigned long state, - void *ndev) + unsigned long state, void *ptr) { - struct net_device *dev = ndev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev; int ret; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index afa283841e8c..d5aed3bb3945 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3411,7 +3411,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, (u32)sinfo->rx_bytes)) goto nla_put_failure; if ((sinfo->filled & (STATION_INFO_TX_BYTES | - NL80211_STA_INFO_TX_BYTES64)) && + STATION_INFO_TX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES, (u32)sinfo->tx_bytes)) goto nla_put_failure; @@ -7577,6 +7577,8 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg, &tcp->payload_tok)) return -ENOBUFS; + nla_nest_end(msg, nl_tcp); + return 0; } @@ -9970,6 +9972,7 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq) || (sig_dbm && nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) || @@ -10010,6 +10013,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || nla_put(msg, NL80211_ATTR_FRAME, len, buf) || nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie) || (ack && nla_put_flag(msg, NL80211_ATTR_ACK))) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index a9dc5c736df0..3ed35c345cae 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -231,6 +231,9 @@ void cfg80211_conn_work(struct work_struct *work) mutex_lock(&rdev->sched_scan_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { + if (!wdev->netdev) + continue; + wdev_lock(wdev); if (!netif_running(wdev->netdev)) { wdev_unlock(wdev); @@ -961,7 +964,7 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, /* was it connected by userspace SME? */ if (!wdev->conn) { cfg80211_mlme_down(rdev, dev); - return 0; + goto disconnect; } if (wdev->sme_state == CFG80211_SME_CONNECTING && @@ -987,6 +990,7 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, return err; } + disconnect: if (wdev->sme_state == CFG80211_SME_CONNECTED) __cfg80211_disconnected(dev, NULL, 0, 0, false); else if (wdev->sme_state == CFG80211_SME_CONNECTING) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ecd4fcec3c94..5755bc14abbd 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2441,6 +2441,7 @@ TRACE_EVENT(cfg80211_report_wowlan_wakeup, TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY + __field(bool, non_wireless) __field(bool, disconnect) __field(bool, magic_pkt) __field(bool, gtk_rekey_failure) @@ -2449,20 +2450,22 @@ TRACE_EVENT(cfg80211_report_wowlan_wakeup, __field(bool, rfkill_release) __field(s32, pattern_idx) __field(u32, packet_len) - __dynamic_array(u8, packet, wakeup->packet_present_len) + __dynamic_array(u8, packet, + wakeup ? wakeup->packet_present_len : 0) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; - __entry->disconnect = wakeup->disconnect; - __entry->magic_pkt = wakeup->magic_pkt; - __entry->gtk_rekey_failure = wakeup->gtk_rekey_failure; - __entry->eap_identity_req = wakeup->eap_identity_req; - __entry->four_way_handshake = wakeup->four_way_handshake; - __entry->rfkill_release = wakeup->rfkill_release; - __entry->pattern_idx = wakeup->pattern_idx; - __entry->packet_len = wakeup->packet_len; - if (wakeup->packet && wakeup->packet_present_len) + __entry->non_wireless = !wakeup; + __entry->disconnect = wakeup ? wakeup->disconnect : false; + __entry->magic_pkt = wakeup ? wakeup->magic_pkt : false; + __entry->gtk_rekey_failure = wakeup ? wakeup->gtk_rekey_failure : false; + __entry->eap_identity_req = wakeup ? wakeup->eap_identity_req : false; + __entry->four_way_handshake = wakeup ? wakeup->four_way_handshake : false; + __entry->rfkill_release = wakeup ? wakeup->rfkill_release : false; + __entry->pattern_idx = wakeup ? wakeup->pattern_idx : false; + __entry->packet_len = wakeup ? wakeup->packet_len : false; + if (wakeup && wakeup->packet && wakeup->packet_present_len) memcpy(__get_dynamic_array(packet), wakeup->packet, wakeup->packet_present_len); ), diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 37ca9694aabe..1d964e23853f 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -224,7 +224,7 @@ static void x25_kill_by_device(struct net_device *dev) static int x25_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct x25_neigh *nb; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index bcfda8921b5b..eb4a84288648 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -64,6 +64,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) if (unlikely(x->km.state != XFRM_STATE_VALID)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); + err = -EINVAL; goto error; } @@ -88,7 +89,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) err = x->type->output(x, skb); if (err == -EINPROGRESS) - goto out_exit; + goto out; resume: if (err) { @@ -106,15 +107,14 @@ resume: x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); - err = 0; + return 0; -out_exit: - return err; error: spin_unlock_bh(&x->lock); error_nolock: kfree_skb(skb); - goto out_exit; +out: + return err; } int xfrm_output_resume(struct sk_buff *skb, int err) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 23cea0f74336..e52cab3591dd 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2557,11 +2557,12 @@ static void __xfrm_garbage_collect(struct net *net) } } -static void xfrm_garbage_collect(struct net *net) +void xfrm_garbage_collect(struct net *net) { flow_cache_flush(); __xfrm_garbage_collect(net); } +EXPORT_SYMBOL(xfrm_garbage_collect); static void xfrm_garbage_collect_deferred(struct net *net) { @@ -2784,7 +2785,7 @@ static void __net_init xfrm_dst_ops_init(struct net *net) static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_DOWN: diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index aa778748c565..3f565e495ac6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1681,6 +1681,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, out: xfrm_pol_put(xp); + if (delete && err == 0) + xfrm_garbage_collect(net); return err; } |