diff options
Diffstat (limited to 'net')
118 files changed, 4529 insertions, 1319 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 8ccee3d01822..5e9950453955 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -647,13 +647,14 @@ out: return err; } -static struct sk_buff **vlan_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff *vlan_gro_receive(struct list_head *head, + struct sk_buff *skb) { - struct sk_buff *p, **pp = NULL; - struct vlan_hdr *vhdr; - unsigned int hlen, off_vlan; const struct packet_offload *ptype; + unsigned int hlen, off_vlan; + struct sk_buff *pp = NULL; + struct vlan_hdr *vhdr; + struct sk_buff *p; __be16 type; int flush = 1; @@ -675,7 +676,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head, flush = 0; - for (p = *head; p; p = p->next) { + list_for_each_entry(p, head, list) { struct vlan_hdr *vhdr2; if (!NAPI_GRO_CB(p)->same_flow) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index a2de5a44bd41..ff9659af6b91 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1449,7 +1449,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) * detection frames. Set the locally administered bit to avoid * collisions with users mac addresses. */ - random_ether_addr(bat_priv->bla.loopdetect_addr); + eth_random_addr(bat_priv->bla.loopdetect_addr); bat_priv->bla.loopdetect_addr[0] = 0xba; bat_priv->bla.loopdetect_addr[1] = 0xbe; bat_priv->bla.loopdetect_lasttime = jiffies; diff --git a/net/core/dev.c b/net/core/dev.c index a5aa1c7444e6..08d58e0debe5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2081,6 +2081,10 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq) EXPORT_SYMBOL(netdev_txq_to_tc); #ifdef CONFIG_XPS +struct static_key xps_needed __read_mostly; +EXPORT_SYMBOL(xps_needed); +struct static_key xps_rxqs_needed __read_mostly; +EXPORT_SYMBOL(xps_rxqs_needed); static DEFINE_MUTEX(xps_map_mutex); #define xmap_dereference(P) \ rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) @@ -2092,7 +2096,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps, int pos; if (dev_maps) - map = xmap_dereference(dev_maps->cpu_map[tci]); + map = xmap_dereference(dev_maps->attr_map[tci]); if (!map) return false; @@ -2105,7 +2109,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps, break; } - RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL); + RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL); kfree_rcu(map, rcu); return false; } @@ -2135,33 +2139,68 @@ static bool remove_xps_queue_cpu(struct net_device *dev, return active; } +static void clean_xps_maps(struct net_device *dev, const unsigned long *mask, + struct xps_dev_maps *dev_maps, unsigned int nr_ids, + u16 offset, u16 count, bool is_rxqs_map) +{ + bool active = false; + int i, j; + + for (j = -1; j = netif_attrmask_next(j, mask, nr_ids), + j < nr_ids;) + active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, + count); + if (!active) { + if (is_rxqs_map) { + RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); + } else { + RCU_INIT_POINTER(dev->xps_cpus_map, NULL); + + for (i = offset + (count - 1); count--; i--) + netdev_queue_numa_node_write( + netdev_get_tx_queue(dev, i), + NUMA_NO_NODE); + } + kfree_rcu(dev_maps, rcu); + } +} + static void netif_reset_xps_queues(struct net_device *dev, u16 offset, u16 count) { + const unsigned long *possible_mask = NULL; struct xps_dev_maps *dev_maps; - int cpu, i; - bool active = false; + unsigned int nr_ids; + + if (!static_key_false(&xps_needed)) + return; mutex_lock(&xps_map_mutex); - dev_maps = xmap_dereference(dev->xps_maps); + if (static_key_false(&xps_rxqs_needed)) { + dev_maps = xmap_dereference(dev->xps_rxqs_map); + if (dev_maps) { + nr_ids = dev->num_rx_queues; + clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, + offset, count, true); + } + } + + dev_maps = xmap_dereference(dev->xps_cpus_map); if (!dev_maps) goto out_no_maps; - for_each_possible_cpu(cpu) - active |= remove_xps_queue_cpu(dev, dev_maps, cpu, - offset, count); - - if (!active) { - RCU_INIT_POINTER(dev->xps_maps, NULL); - kfree_rcu(dev_maps, rcu); - } - - for (i = offset + (count - 1); count--; i--) - netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), - NUMA_NO_NODE); + if (num_possible_cpus() > 1) + possible_mask = cpumask_bits(cpu_possible_mask); + nr_ids = nr_cpu_ids; + clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count, + false); out_no_maps: + if (static_key_enabled(&xps_rxqs_needed)) + static_key_slow_dec(&xps_rxqs_needed); + + static_key_slow_dec(&xps_needed); mutex_unlock(&xps_map_mutex); } @@ -2170,8 +2209,8 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) netif_reset_xps_queues(dev, index, dev->num_tx_queues - index); } -static struct xps_map *expand_xps_map(struct xps_map *map, - int cpu, u16 index) +static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index, + u16 index, bool is_rxqs_map) { struct xps_map *new_map; int alloc_len = XPS_MIN_MAP_ALLOC; @@ -2183,7 +2222,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map, return map; } - /* Need to add queue to this CPU's existing map */ + /* Need to add tx-queue to this CPU's/rx-queue's existing map */ if (map) { if (pos < map->alloc_len) return map; @@ -2191,9 +2230,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map, alloc_len = map->alloc_len * 2; } - /* Need to allocate new map to store queue on this CPU's map */ - new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL, - cpu_to_node(cpu)); + /* Need to allocate new map to store tx-queue on this CPU's/rx-queue's + * map + */ + if (is_rxqs_map) + new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL); + else + new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL, + cpu_to_node(attr_index)); if (!new_map) return NULL; @@ -2205,14 +2249,16 @@ static struct xps_map *expand_xps_map(struct xps_map *map, return new_map; } -int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, - u16 index) +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, + u16 index, bool is_rxqs_map) { + const unsigned long *online_mask = NULL, *possible_mask = NULL; struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; - int i, cpu, tci, numa_node_id = -2; + int i, j, tci, numa_node_id = -2; int maps_sz, num_tc = 1, tc = 0; struct xps_map *map, *new_map; bool active = false; + unsigned int nr_ids; if (dev->num_tc) { num_tc = dev->num_tc; @@ -2221,16 +2267,27 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, return -EINVAL; } - maps_sz = XPS_DEV_MAPS_SIZE(num_tc); - if (maps_sz < L1_CACHE_BYTES) - maps_sz = L1_CACHE_BYTES; - mutex_lock(&xps_map_mutex); + if (is_rxqs_map) { + maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues); + dev_maps = xmap_dereference(dev->xps_rxqs_map); + nr_ids = dev->num_rx_queues; + } else { + maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc); + if (num_possible_cpus() > 1) { + online_mask = cpumask_bits(cpu_online_mask); + possible_mask = cpumask_bits(cpu_possible_mask); + } + dev_maps = xmap_dereference(dev->xps_cpus_map); + nr_ids = nr_cpu_ids; + } - dev_maps = xmap_dereference(dev->xps_maps); + if (maps_sz < L1_CACHE_BYTES) + maps_sz = L1_CACHE_BYTES; /* allocate memory for queue storage */ - for_each_cpu_and(cpu, cpu_online_mask, mask) { + for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids), + j < nr_ids;) { if (!new_dev_maps) new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); if (!new_dev_maps) { @@ -2238,73 +2295,85 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, return -ENOMEM; } - tci = cpu * num_tc + tc; - map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) : + tci = j * num_tc + tc; + map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) : NULL; - map = expand_xps_map(map, cpu, index); + map = expand_xps_map(map, j, index, is_rxqs_map); if (!map) goto error; - RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); } if (!new_dev_maps) goto out_no_new_maps; - for_each_possible_cpu(cpu) { + static_key_slow_inc(&xps_needed); + if (is_rxqs_map) + static_key_slow_inc(&xps_rxqs_needed); + + for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), + j < nr_ids;) { /* copy maps belonging to foreign traffic classes */ - for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) { + for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) { /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->cpu_map[tci]); - RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); + map = xmap_dereference(dev_maps->attr_map[tci]); + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); } /* We need to explicitly update tci as prevous loop * could break out early if dev_maps is NULL. */ - tci = cpu * num_tc + tc; + tci = j * num_tc + tc; - if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { - /* add queue to CPU maps */ + if (netif_attr_test_mask(j, mask, nr_ids) && + netif_attr_test_online(j, online_mask, nr_ids)) { + /* add tx-queue to CPU/rx-queue maps */ int pos = 0; - map = xmap_dereference(new_dev_maps->cpu_map[tci]); + map = xmap_dereference(new_dev_maps->attr_map[tci]); while ((pos < map->len) && (map->queues[pos] != index)) pos++; if (pos == map->len) map->queues[map->len++] = index; #ifdef CONFIG_NUMA - if (numa_node_id == -2) - numa_node_id = cpu_to_node(cpu); - else if (numa_node_id != cpu_to_node(cpu)) - numa_node_id = -1; + if (!is_rxqs_map) { + if (numa_node_id == -2) + numa_node_id = cpu_to_node(j); + else if (numa_node_id != cpu_to_node(j)) + numa_node_id = -1; + } #endif } else if (dev_maps) { /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->cpu_map[tci]); - RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); + map = xmap_dereference(dev_maps->attr_map[tci]); + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); } /* copy maps belonging to foreign traffic classes */ for (i = num_tc - tc, tci++; dev_maps && --i; tci++) { /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->cpu_map[tci]); - RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); + map = xmap_dereference(dev_maps->attr_map[tci]); + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); } } - rcu_assign_pointer(dev->xps_maps, new_dev_maps); + if (is_rxqs_map) + rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps); + else + rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps); /* Cleanup old maps */ if (!dev_maps) goto out_no_old_maps; - for_each_possible_cpu(cpu) { - for (i = num_tc, tci = cpu * num_tc; i--; tci++) { - new_map = xmap_dereference(new_dev_maps->cpu_map[tci]); - map = xmap_dereference(dev_maps->cpu_map[tci]); + for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), + j < nr_ids;) { + for (i = num_tc, tci = j * num_tc; i--; tci++) { + new_map = xmap_dereference(new_dev_maps->attr_map[tci]); + map = xmap_dereference(dev_maps->attr_map[tci]); if (map && map != new_map) kfree_rcu(map, rcu); } @@ -2317,19 +2386,23 @@ out_no_old_maps: active = true; out_no_new_maps: - /* update Tx queue numa node */ - netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), - (numa_node_id >= 0) ? numa_node_id : - NUMA_NO_NODE); + if (!is_rxqs_map) { + /* update Tx queue numa node */ + netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), + (numa_node_id >= 0) ? + numa_node_id : NUMA_NO_NODE); + } if (!dev_maps) goto out_no_maps; - /* removes queue from unused CPUs */ - for_each_possible_cpu(cpu) { - for (i = tc, tci = cpu * num_tc; i--; tci++) + /* removes tx-queue from unused CPUs/rx-queues */ + for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), + j < nr_ids;) { + for (i = tc, tci = j * num_tc; i--; tci++) active |= remove_xps_queue(dev_maps, tci, index); - if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu)) + if (!netif_attr_test_mask(j, mask, nr_ids) || + !netif_attr_test_online(j, online_mask, nr_ids)) active |= remove_xps_queue(dev_maps, tci, index); for (i = num_tc - tc, tci++; --i; tci++) active |= remove_xps_queue(dev_maps, tci, index); @@ -2337,7 +2410,10 @@ out_no_new_maps: /* free map if not active */ if (!active) { - RCU_INIT_POINTER(dev->xps_maps, NULL); + if (is_rxqs_map) + RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); + else + RCU_INIT_POINTER(dev->xps_cpus_map, NULL); kfree_rcu(dev_maps, rcu); } @@ -2347,11 +2423,12 @@ out_no_maps: return 0; error: /* remove any maps that we added */ - for_each_possible_cpu(cpu) { - for (i = num_tc, tci = cpu * num_tc; i--; tci++) { - new_map = xmap_dereference(new_dev_maps->cpu_map[tci]); + for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), + j < nr_ids;) { + for (i = num_tc, tci = j * num_tc; i--; tci++) { + new_map = xmap_dereference(new_dev_maps->attr_map[tci]); map = dev_maps ? - xmap_dereference(dev_maps->cpu_map[tci]) : + xmap_dereference(dev_maps->attr_map[tci]) : NULL; if (new_map && new_map != map) kfree(new_map); @@ -2363,6 +2440,12 @@ error: kfree(new_dev_maps); return -ENOMEM; } + +int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, + u16 index) +{ + return __netif_set_xps_queue(dev, cpumask_bits(mask), index, false); +} EXPORT_SYMBOL(netif_set_xps_queue); #endif @@ -3376,32 +3459,63 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) } #endif /* CONFIG_NET_EGRESS */ -static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) +#ifdef CONFIG_XPS +static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb, + struct xps_dev_maps *dev_maps, unsigned int tci) +{ + struct xps_map *map; + int queue_index = -1; + + if (dev->num_tc) { + tci *= dev->num_tc; + tci += netdev_get_prio_tc_map(dev, skb->priority); + } + + map = rcu_dereference(dev_maps->attr_map[tci]); + if (map) { + if (map->len == 1) + queue_index = map->queues[0]; + else + queue_index = map->queues[reciprocal_scale( + skb_get_hash(skb), map->len)]; + if (unlikely(queue_index >= dev->real_num_tx_queues)) + queue_index = -1; + } + return queue_index; +} +#endif + +static int get_xps_queue(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_XPS struct xps_dev_maps *dev_maps; - struct xps_map *map; + struct sock *sk = skb->sk; int queue_index = -1; + if (!static_key_false(&xps_needed)) + return -1; + rcu_read_lock(); - dev_maps = rcu_dereference(dev->xps_maps); + if (!static_key_false(&xps_rxqs_needed)) + goto get_cpus_map; + + dev_maps = rcu_dereference(dev->xps_rxqs_map); if (dev_maps) { - unsigned int tci = skb->sender_cpu - 1; + int tci = sk_rx_queue_get(sk); - if (dev->num_tc) { - tci *= dev->num_tc; - tci += netdev_get_prio_tc_map(dev, skb->priority); - } + if (tci >= 0 && tci < dev->num_rx_queues) + queue_index = __get_xps_queue_idx(dev, skb, dev_maps, + tci); + } - map = rcu_dereference(dev_maps->cpu_map[tci]); - if (map) { - if (map->len == 1) - queue_index = map->queues[0]; - else - queue_index = map->queues[reciprocal_scale(skb_get_hash(skb), - map->len)]; - if (unlikely(queue_index >= dev->real_num_tx_queues)) - queue_index = -1; +get_cpus_map: + if (queue_index < 0) { + dev_maps = rcu_dereference(dev->xps_cpus_map); + if (dev_maps) { + unsigned int tci = skb->sender_cpu - 1; + + queue_index = __get_xps_queue_idx(dev, skb, dev_maps, + tci); } } rcu_read_unlock(); @@ -4875,42 +4989,46 @@ out: return netif_receive_skb_internal(skb); } -/* napi->gro_list contains packets ordered by age. - * youngest packets at the head of it. - * Complete skbs in reverse order to reduce latencies. - */ -void napi_gro_flush(struct napi_struct *napi, bool flush_old) +static void __napi_gro_flush_chain(struct napi_struct *napi, struct list_head *head, + bool flush_old) { - struct sk_buff *skb, *prev = NULL; - - /* scan list and build reverse chain */ - for (skb = napi->gro_list; skb != NULL; skb = skb->next) { - skb->prev = prev; - prev = skb; - } - - for (skb = prev; skb; skb = prev) { - skb->next = NULL; + struct sk_buff *skb, *p; + list_for_each_entry_safe_reverse(skb, p, head, list) { if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) return; - - prev = skb->prev; + list_del_init(&skb->list); napi_gro_complete(skb); napi->gro_count--; } +} - napi->gro_list = NULL; +/* napi->gro_hash contains packets ordered by age. + * youngest packets at the head of it. + * Complete skbs in reverse order to reduce latencies. + */ +void napi_gro_flush(struct napi_struct *napi, bool flush_old) +{ + int i; + + for (i = 0; i < GRO_HASH_BUCKETS; i++) { + struct list_head *head = &napi->gro_hash[i]; + + __napi_gro_flush_chain(napi, head, flush_old); + } } EXPORT_SYMBOL(napi_gro_flush); -static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) +static struct list_head *gro_list_prepare(struct napi_struct *napi, + struct sk_buff *skb) { - struct sk_buff *p; unsigned int maclen = skb->dev->hard_header_len; u32 hash = skb_get_hash_raw(skb); + struct list_head *head; + struct sk_buff *p; - for (p = napi->gro_list; p; p = p->next) { + head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)]; + list_for_each_entry(p, head, list) { unsigned long diffs; NAPI_GRO_CB(p)->flush = 0; @@ -4933,6 +5051,8 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) maclen); NAPI_GRO_CB(p)->same_flow = !diffs; } + + return head; } static void skb_gro_reset_offset(struct sk_buff *skb) @@ -4975,20 +5095,54 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow) } } +static void gro_flush_oldest(struct napi_struct *napi) +{ + struct sk_buff *oldest = NULL; + unsigned long age = jiffies; + int i; + + for (i = 0; i < GRO_HASH_BUCKETS; i++) { + struct list_head *head = &napi->gro_hash[i]; + struct sk_buff *skb; + + if (list_empty(head)) + continue; + + skb = list_last_entry(head, struct sk_buff, list); + if (!oldest || time_before(NAPI_GRO_CB(skb)->age, age)) { + oldest = skb; + age = NAPI_GRO_CB(skb)->age; + } + } + + /* We are called with napi->gro_count >= MAX_GRO_SKBS, so this is + * impossible. + */ + if (WARN_ON_ONCE(!oldest)) + return; + + /* Do not adjust napi->gro_count, caller is adding a new SKB to + * the chain. + */ + list_del(&oldest->list); + napi_gro_complete(oldest); +} + static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { - struct sk_buff **pp = NULL; + struct list_head *head = &offload_base; struct packet_offload *ptype; __be16 type = skb->protocol; - struct list_head *head = &offload_base; - int same_flow; + struct list_head *gro_head; + struct sk_buff *pp = NULL; enum gro_result ret; + int same_flow; int grow; if (netif_elide_gro(skb->dev)) goto normal; - gro_list_prepare(napi, skb); + gro_head = gro_list_prepare(napi, skb); rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -5022,7 +5176,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->csum_valid = 0; } - pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); + pp = ptype->callbacks.gro_receive(gro_head, skb); break; } rcu_read_unlock(); @@ -5039,11 +5193,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; if (pp) { - struct sk_buff *nskb = *pp; - - *pp = nskb->next; - nskb->next = NULL; - napi_gro_complete(nskb); + list_del_init(&pp->list); + napi_gro_complete(pp); napi->gro_count--; } @@ -5054,16 +5205,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff goto normal; if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) { - struct sk_buff *nskb = napi->gro_list; - - /* locate the end of the list to select the 'oldest' flow */ - while (nskb->next) { - pp = &nskb->next; - nskb = *pp; - } - *pp = NULL; - nskb->next = NULL; - napi_gro_complete(nskb); + gro_flush_oldest(napi); } else { napi->gro_count++; } @@ -5071,8 +5213,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->age = jiffies; NAPI_GRO_CB(skb)->last = skb; skb_shinfo(skb)->gso_size = skb_gro_len(skb); - skb->next = napi->gro_list; - napi->gro_list = skb; + list_add(&skb->list, gro_head); ret = GRO_HELD; pull: @@ -5478,7 +5619,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done) NAPIF_STATE_IN_BUSY_POLL))) return false; - if (n->gro_list) { + if (n->gro_count) { unsigned long timeout = 0; if (work_done) @@ -5687,7 +5828,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) /* Note : we use a relaxed variant of napi_schedule_prep() not setting * NAPI_STATE_MISSED, since we do not react to a device IRQ. */ - if (napi->gro_list && !napi_disable_pending(napi) && + if (napi->gro_count && !napi_disable_pending(napi) && !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) __napi_schedule_irqoff(napi); @@ -5697,11 +5838,14 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { + int i; + INIT_LIST_HEAD(&napi->poll_list); hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); napi->timer.function = napi_watchdog; napi->gro_count = 0; - napi->gro_list = NULL; + for (i = 0; i < GRO_HASH_BUCKETS; i++) + INIT_LIST_HEAD(&napi->gro_hash[i]); napi->skb = NULL; napi->poll = poll; if (weight > NAPI_POLL_WEIGHT) @@ -5734,6 +5878,18 @@ void napi_disable(struct napi_struct *n) } EXPORT_SYMBOL(napi_disable); +static void flush_gro_hash(struct napi_struct *napi) +{ + int i; + + for (i = 0; i < GRO_HASH_BUCKETS; i++) { + struct sk_buff *skb, *n; + + list_for_each_entry_safe(skb, n, &napi->gro_hash[i], list) + kfree_skb(skb); + } +} + /* Must be called in process context */ void netif_napi_del(struct napi_struct *napi) { @@ -5743,8 +5899,7 @@ void netif_napi_del(struct napi_struct *napi) list_del_init(&napi->dev_list); napi_free_frags(napi); - kfree_skb_list(napi->gro_list); - napi->gro_list = NULL; + flush_gro_hash(napi); napi->gro_count = 0; } EXPORT_SYMBOL(netif_napi_del); @@ -5787,7 +5942,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) goto out_unlock; } - if (n->gro_list) { + if (n->gro_count) { /* flush too old packets * If HZ < 1000, flush all packets. */ diff --git a/net/core/filter.c b/net/core/filter.c index 0ca6907d7efe..547fd34589be 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3582,7 +3582,7 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, if (unlikely(size > IP_TUNNEL_OPTS_MAX)) return -ENOMEM; - ip_tunnel_info_opts_set(info, from, size); + ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT); return 0; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8e3fda9e725c..cbe85d8d4cc2 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1148,7 +1148,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh->nud_state = new; err = 0; notify = old & NUD_VALID; - if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && + if (((old & (NUD_INCOMPLETE | NUD_PROBE)) || + (flags & NEIGH_UPDATE_F_ADMIN)) && (new & NUD_FAILED)) { neigh_invalidate(neigh); notify = 1; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index bb7e80f4ced3..f25ac5ff48a6 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, return -ENOMEM; rcu_read_lock(); - dev_maps = rcu_dereference(dev->xps_maps); + dev_maps = rcu_dereference(dev->xps_cpus_map); if (dev_maps) { for_each_possible_cpu(cpu) { int i, tci = cpu * num_tc + tc; struct xps_map *map; - map = rcu_dereference(dev_maps->cpu_map[tci]); + map = rcu_dereference(dev_maps->attr_map[tci]); if (!map) continue; @@ -1283,6 +1283,88 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue, static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init = __ATTR_RW(xps_cpus); + +static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf) +{ + struct net_device *dev = queue->dev; + struct xps_dev_maps *dev_maps; + unsigned long *mask, index; + int j, len, num_tc = 1, tc = 0; + + index = get_netdev_queue_index(queue); + + if (dev->num_tc) { + num_tc = dev->num_tc; + tc = netdev_txq_to_tc(dev, index); + if (tc < 0) + return -EINVAL; + } + mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long), + GFP_KERNEL); + if (!mask) + return -ENOMEM; + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_rxqs_map); + if (!dev_maps) + goto out_no_maps; + + for (j = -1; j = netif_attrmask_next(j, NULL, dev->num_rx_queues), + j < dev->num_rx_queues;) { + int i, tci = j * num_tc + tc; + struct xps_map *map; + + map = rcu_dereference(dev_maps->attr_map[tci]); + if (!map) + continue; + + for (i = map->len; i--;) { + if (map->queues[i] == index) { + set_bit(j, mask); + break; + } + } + } +out_no_maps: + rcu_read_unlock(); + + len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues); + kfree(mask); + + return len < PAGE_SIZE ? len : -EINVAL; +} + +static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, + size_t len) +{ + struct net_device *dev = queue->dev; + struct net *net = dev_net(dev); + unsigned long *mask, index; + int err; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long), + GFP_KERNEL); + if (!mask) + return -ENOMEM; + + index = get_netdev_queue_index(queue); + + err = bitmap_parse(buf, len, mask, dev->num_rx_queues); + if (err) { + kfree(mask); + return err; + } + + err = __netif_set_xps_queue(dev, mask, index, true); + kfree(mask); + return err ? : len; +} + +static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init + = __ATTR_RW(xps_rxqs); #endif /* CONFIG_XPS */ static struct attribute *netdev_queue_default_attrs[] __ro_after_init = { @@ -1290,6 +1372,7 @@ static struct attribute *netdev_queue_default_attrs[] __ro_after_init = { &queue_traffic_class.attr, #ifdef CONFIG_XPS &xps_cpus_attribute.attr, + &xps_rxqs_attribute.attr, &queue_tx_maxrate.attr, #endif NULL diff --git a/net/core/skbuff.c b/net/core/skbuff.c index eba8dae22c25..1357f36c8a5e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3815,14 +3815,14 @@ err: } EXPORT_SYMBOL_GPL(skb_segment); -int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) +int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) { struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb); unsigned int offset = skb_gro_offset(skb); unsigned int headlen = skb_headlen(skb); unsigned int len = skb_gro_len(skb); - struct sk_buff *lp, *p = *head; unsigned int delta_truesize; + struct sk_buff *lp; if (unlikely(p->len + len >= 65536)) return -E2BIG; @@ -4911,7 +4911,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) return; ipvs_reset(skb); - skb_orphan(skb); skb->mark = 0; } EXPORT_SYMBOL_GPL(skb_scrub_packet); diff --git a/net/core/sock.c b/net/core/sock.c index 9e8f65585b81..6429982eb976 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2401,9 +2401,10 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { struct proto *prot = sk->sk_prot; long allocated = sk_memory_allocated_add(sk, amt); + bool charged = true; if (mem_cgroup_sockets_enabled && sk->sk_memcg && - !mem_cgroup_charge_skmem(sk->sk_memcg, amt)) + !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt))) goto suppress_allocation; /* Under limit. */ @@ -2461,7 +2462,8 @@ suppress_allocation: return 1; } - trace_sock_exceed_buf_limit(sk, prot, allocated); + if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged)) + trace_sock_exceed_buf_limit(sk, prot, allocated, kind); sk_memory_allocated_sub(sk, amt); @@ -2818,6 +2820,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_pacing_rate = ~0U; sk->sk_pacing_shift = 10; sk->sk_incoming_cpu = -1; + + sk_rx_queue_clear(sk); /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/core/xdp.c b/net/core/xdp.c index 9d1f22072d5d..31c58719b5a9 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -45,8 +45,8 @@ static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id) != sizeof(u32)); - /* Use cyclic increasing ID as direct hash key, see rht_bucket_index */ - return key << RHT_HASH_RESERVED_SPACE; + /* Use cyclic increasing ID as direct hash key */ + return key; } static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 1e3b6a6d8a40..71536c435132 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -900,7 +900,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev, switch (f->command) { case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, cb, dev, dev); + return tcf_block_cb_register(f->block, cb, dev, dev, f->extack); case TC_BLOCK_UNBIND: tcf_block_cb_unregister(f->block, cb, dev); return 0; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index ee28440f57c5..fd8faa0dfa61 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -427,13 +427,13 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) } EXPORT_SYMBOL(sysfs_format_mac); -struct sk_buff **eth_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb) { - struct sk_buff *p, **pp = NULL; - struct ethhdr *eh, *eh2; - unsigned int hlen, off_eth; const struct packet_offload *ptype; + unsigned int hlen, off_eth; + struct sk_buff *pp = NULL; + struct ethhdr *eh, *eh2; + struct sk_buff *p; __be16 type; int flush = 1; @@ -448,7 +448,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head, flush = 0; - for (p = *head; p; p = p->next) { + list_for_each_entry(p, head, list) { if (!NAPI_GRO_CB(p)->same_flow) continue; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b403499fdabe..9263a2c114e0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1384,12 +1384,12 @@ out: } EXPORT_SYMBOL(inet_gso_segment); -struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) +struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) { const struct net_offload *ops; - struct sk_buff **pp = NULL; - struct sk_buff *p; + struct sk_buff *pp = NULL; const struct iphdr *iph; + struct sk_buff *p; unsigned int hlen; unsigned int off; unsigned int id; @@ -1425,7 +1425,7 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF)); id >>= 16; - for (p = *head; p; p = p->next) { + list_for_each_entry(p, head, list) { struct iphdr *iph2; u16 flush_id; @@ -1505,8 +1505,8 @@ out: } EXPORT_SYMBOL(inet_gro_receive); -static struct sk_buff **ipip_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff *ipip_gro_receive(struct list_head *head, + struct sk_buff *skb) { if (NAPI_GRO_CB(skb)->encap_mark) { NAPI_GRO_CB(skb)->flush = 1; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 7cf755ef9efb..bbeecd13e534 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -28,8 +28,8 @@ #include <linux/spinlock.h> #include <net/udp.h> -static struct sk_buff **esp4_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff *esp4_gro_receive(struct list_head *head, + struct sk_buff *skb) { int offset = skb_gro_offset(skb); struct xfrm_offload *xo; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index c9ec1603666b..500a59906b87 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -224,14 +224,14 @@ drop: return 0; } -static struct sk_buff **fou_gro_receive(struct sock *sk, - struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff *fou_gro_receive(struct sock *sk, + struct list_head *head, + struct sk_buff *skb) { - const struct net_offload *ops; - struct sk_buff **pp = NULL; u8 proto = fou_from_sock(sk)->protocol; const struct net_offload **offloads; + const struct net_offload *ops; + struct sk_buff *pp = NULL; /* We can clear the encap_mark for FOU as we are essentially doing * one of two possible things. We are either adding an L4 tunnel @@ -305,13 +305,13 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, return guehdr; } -static struct sk_buff **gue_gro_receive(struct sock *sk, - struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff *gue_gro_receive(struct sock *sk, + struct list_head *head, + struct sk_buff *skb) { const struct net_offload **offloads; const struct net_offload *ops; - struct sk_buff **pp = NULL; + struct sk_buff *pp = NULL; struct sk_buff *p; struct guehdr *guehdr; size_t len, optlen, hdrlen, off; @@ -397,7 +397,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk, skb_gro_pull(skb, hdrlen); - for (p = *head; p; p = p->next) { + list_for_each_entry(p, head, list) { const struct guehdr *guehdr2; if (!NAPI_GRO_CB(p)->same_flow) diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 6a7d980105f6..6c63524f598a 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -108,10 +108,10 @@ out: return segs; } -static struct sk_buff **gre_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff *gre_gro_receive(struct list_head *head, + struct sk_buff *skb) { - struct sk_buff **pp = NULL; + struct sk_buff *pp = NULL; struct sk_buff *p; const struct gre_base_hdr *greh; unsigned int hlen, grehlen; @@ -182,7 +182,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, null_compute_pseudo); } - for (p = *head; p; p = p->next) { + list_for_each_entry(p, head, list) { const struct gre_base_hdr *greh2; if (!NAPI_GRO_CB(p)->same_flow) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index c9e35b81d093..316518f87294 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -20,6 +20,7 @@ #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/slab.h> +#include <linux/rhashtable.h> #include <net/sock.h> #include <net/inet_frag.h> diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2d8efeecf619..c8ca5d8f0f75 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -587,6 +587,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, goto err_free_skb; key = &tun_info->key; + if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) + goto err_free_rt; md = ip_tunnel_info_opts(tun_info); if (!md) goto err_free_rt; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9f79b9803a16..82f914122f1b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -60,6 +60,7 @@ #include <linux/netfilter_ipv4.h> #include <linux/compat.h> #include <linux/export.h> +#include <linux/rhashtable.h> #include <net/ip_tunnels.h> #include <net/checksum.h> #include <net/netlink.h> diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index cafb0506c8c9..1ad9aa62a97b 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -2,6 +2,7 @@ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation */ +#include <linux/rhashtable.h> #include <linux/mroute_base.h> /* Sets everything common except 'dev', since that is done under locking */ diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 4388de0e5380..1e6f28c97d3a 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -35,7 +35,7 @@ static const struct nf_loginfo default_loginfo = { }; /* One level of recursion won't kill us */ -static void dump_ipv4_packet(struct nf_log_buf *m, +static void dump_ipv4_packet(struct net *net, struct nf_log_buf *m, const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int iphoff) { @@ -183,7 +183,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m, /* Max length: 3+maxlen */ if (!iphoff) { /* Only recurse once. */ nf_log_buf_add(m, "["); - dump_ipv4_packet(m, info, skb, + dump_ipv4_packet(net, m, info, skb, iphoff + ih->ihl*4+sizeof(_icmph)); nf_log_buf_add(m, "] "); } @@ -251,7 +251,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m, /* Max length: 15 "UID=4294967295 " */ if ((logflags & NF_LOG_UID) && !iphoff) - nf_log_dump_sk_uid_gid(m, skb->sk); + nf_log_dump_sk_uid_gid(net, m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ if (!iphoff && skb->mark) @@ -333,7 +333,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf, if (in != NULL) dump_ipv4_mac_header(m, loginfo, skb); - dump_ipv4_packet(m, loginfo, skb, 0); + dump_ipv4_packet(net, m, loginfo, skb, 0); nf_log_buf_close(m); } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 77350c1256ce..b46e4cf9a55a 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -287,6 +287,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED), SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE), SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED), + SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP), + SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e7b53d2a971f..bf461fa77ed6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2574,6 +2574,7 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); tp->srtt_us = 0; + tp->rcv_rtt_last_tsecr = 0; tp->write_seq += tp->max_window + 2; if (tp->write_seq == 0) tp->write_seq = 1; diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 58e2f479ffb4..3b5f45b9e81e 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -205,7 +205,11 @@ static u32 bbr_bw(const struct sock *sk) */ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) { - rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache); + unsigned int mss = tcp_sk(sk)->mss_cache; + + if (!tcp_needs_internal_pacing(sk)) + mss = tcp_mss_to_mtu(sk, mss); + rate *= mss; rate *= gain; rate >>= BBR_SCALE; rate *= USEC_PER_SEC; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8e5522c6833a..814ea43dd12f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -78,6 +78,7 @@ #include <linux/errqueue.h> #include <trace/events/tcp.h> #include <linux/static_key.h> +#include <net/busy_poll.h> int sysctl_tcp_max_orphans __read_mostly = NR_FILE; @@ -582,9 +583,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, { struct tcp_sock *tp = tcp_sk(sk); - if (tp->rx_opt.rcv_tsecr && - (TCP_SKB_CB(skb)->end_seq - - TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) { + if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr) + return; + tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr; + + if (TCP_SKB_CB(skb)->end_seq - + TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) { u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; u32 delta_us; @@ -4617,8 +4621,10 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) skb->data_len = data_len; skb->len = size; - if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) + if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP); goto err_free; + } err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); if (err) @@ -4674,15 +4680,19 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) * Out of sequence packets to the out_of_order_queue. */ if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { - if (tcp_receive_window(tp) == 0) + if (tcp_receive_window(tp) == 0) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP); goto out_of_window; + } /* Ok. In sequence. In window. */ queue_and_out: if (skb_queue_len(&sk->sk_receive_queue) == 0) sk_forced_mem_schedule(sk, skb->truesize); - else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) + else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP); goto drop; + } eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); @@ -4741,8 +4751,10 @@ drop: /* If window is closed, drop tail of packet. But after * remembering D-SACK for its head made in previous line. */ - if (!tcp_receive_window(tp)) + if (!tcp_receive_window(tp)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP); goto out_of_window; + } goto queue_and_out; } @@ -5484,6 +5496,11 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) tcp_ack(sk, skb, 0); __kfree_skb(skb); tcp_data_snd_check(sk); + /* When receiving pure ack in fast path, update + * last ts ecr directly instead of calling + * tcp_rcv_rtt_measure_ts() + */ + tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr; return; } else { /* Header too small */ TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); @@ -5585,6 +5602,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) if (skb) { icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); security_inet_conn_established(sk, skb); + sk_mark_napi_id(sk, skb); } tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB); @@ -6413,6 +6431,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->snt_isn = isn; tcp_rsk(req)->txhash = net_tx_rndhash(); tcp_openreq_init_rwin(req, sk, dst); + sk_rx_queue_set(req_to_sk(req), skb); if (!want_cookie) { tcp_reqsk_record_syn(sk, req, skb); fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1dda1341a223..dac5893a52b4 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -449,119 +449,122 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, struct sk_buff *skb) { struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); + const struct inet_request_sock *ireq = inet_rsk(req); + struct tcp_request_sock *treq = tcp_rsk(req); + struct inet_connection_sock *newicsk; + struct tcp_sock *oldtp, *newtp; - if (newsk) { - const struct inet_request_sock *ireq = inet_rsk(req); - struct tcp_request_sock *treq = tcp_rsk(req); - struct inet_connection_sock *newicsk = inet_csk(newsk); - struct tcp_sock *newtp = tcp_sk(newsk); - struct tcp_sock *oldtp = tcp_sk(sk); - - smc_check_reset_syn_req(oldtp, req, newtp); - - /* Now setup tcp_sock */ - newtp->pred_flags = 0; - - newtp->rcv_wup = newtp->copied_seq = - newtp->rcv_nxt = treq->rcv_isn + 1; - newtp->segs_in = 1; - - newtp->snd_sml = newtp->snd_una = - newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; - - INIT_LIST_HEAD(&newtp->tsq_node); - INIT_LIST_HEAD(&newtp->tsorted_sent_queue); - - tcp_init_wl(newtp, treq->rcv_isn); - - newtp->srtt_us = 0; - newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); - newicsk->icsk_rto = TCP_TIMEOUT_INIT; - newicsk->icsk_ack.lrcvtime = tcp_jiffies32; - - newtp->packets_out = 0; - newtp->retrans_out = 0; - newtp->sacked_out = 0; - newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; - newtp->tlp_high_seq = 0; - newtp->lsndtime = tcp_jiffies32; - newsk->sk_txhash = treq->txhash; - newtp->last_oow_ack_time = 0; - newtp->total_retrans = req->num_retrans; - - /* So many TCP implementations out there (incorrectly) count the - * initial SYN frame in their delayed-ACK and congestion control - * algorithms that we must have the following bandaid to talk - * efficiently to them. -DaveM - */ - newtp->snd_cwnd = TCP_INIT_CWND; - newtp->snd_cwnd_cnt = 0; - - /* There's a bubble in the pipe until at least the first ACK. */ - newtp->app_limited = ~0U; - - tcp_init_xmit_timers(newsk); - newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; - - newtp->rx_opt.saw_tstamp = 0; - - newtp->rx_opt.dsack = 0; - newtp->rx_opt.num_sacks = 0; - - newtp->urg_data = 0; - - if (sock_flag(newsk, SOCK_KEEPOPEN)) - inet_csk_reset_keepalive_timer(newsk, - keepalive_time_when(newtp)); - - newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; - newtp->rx_opt.sack_ok = ireq->sack_ok; - newtp->window_clamp = req->rsk_window_clamp; - newtp->rcv_ssthresh = req->rsk_rcv_wnd; - newtp->rcv_wnd = req->rsk_rcv_wnd; - newtp->rx_opt.wscale_ok = ireq->wscale_ok; - if (newtp->rx_opt.wscale_ok) { - newtp->rx_opt.snd_wscale = ireq->snd_wscale; - newtp->rx_opt.rcv_wscale = ireq->rcv_wscale; - } else { - newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0; - newtp->window_clamp = min(newtp->window_clamp, 65535U); - } - newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) << - newtp->rx_opt.snd_wscale); - newtp->max_window = newtp->snd_wnd; - - if (newtp->rx_opt.tstamp_ok) { - newtp->rx_opt.ts_recent = req->ts_recent; - newtp->rx_opt.ts_recent_stamp = get_seconds(); - newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; - } else { - newtp->rx_opt.ts_recent_stamp = 0; - newtp->tcp_header_len = sizeof(struct tcphdr); - } - newtp->tsoffset = treq->ts_off; + if (!newsk) + return NULL; + + newicsk = inet_csk(newsk); + newtp = tcp_sk(newsk); + oldtp = tcp_sk(sk); + + smc_check_reset_syn_req(oldtp, req, newtp); + + /* Now setup tcp_sock */ + newtp->pred_flags = 0; + + newtp->rcv_wup = newtp->copied_seq = + newtp->rcv_nxt = treq->rcv_isn + 1; + newtp->segs_in = 1; + + newtp->snd_sml = newtp->snd_una = + newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; + + INIT_LIST_HEAD(&newtp->tsq_node); + INIT_LIST_HEAD(&newtp->tsorted_sent_queue); + + tcp_init_wl(newtp, treq->rcv_isn); + + newtp->srtt_us = 0; + newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); + minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); + newicsk->icsk_rto = TCP_TIMEOUT_INIT; + newicsk->icsk_ack.lrcvtime = tcp_jiffies32; + + newtp->packets_out = 0; + newtp->retrans_out = 0; + newtp->sacked_out = 0; + newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + newtp->tlp_high_seq = 0; + newtp->lsndtime = tcp_jiffies32; + newsk->sk_txhash = treq->txhash; + newtp->last_oow_ack_time = 0; + newtp->total_retrans = req->num_retrans; + + /* So many TCP implementations out there (incorrectly) count the + * initial SYN frame in their delayed-ACK and congestion control + * algorithms that we must have the following bandaid to talk + * efficiently to them. -DaveM + */ + newtp->snd_cwnd = TCP_INIT_CWND; + newtp->snd_cwnd_cnt = 0; + + /* There's a bubble in the pipe until at least the first ACK. */ + newtp->app_limited = ~0U; + + tcp_init_xmit_timers(newsk); + newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; + + newtp->rx_opt.saw_tstamp = 0; + + newtp->rx_opt.dsack = 0; + newtp->rx_opt.num_sacks = 0; + + newtp->urg_data = 0; + + if (sock_flag(newsk, SOCK_KEEPOPEN)) + inet_csk_reset_keepalive_timer(newsk, + keepalive_time_when(newtp)); + + newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; + newtp->rx_opt.sack_ok = ireq->sack_ok; + newtp->window_clamp = req->rsk_window_clamp; + newtp->rcv_ssthresh = req->rsk_rcv_wnd; + newtp->rcv_wnd = req->rsk_rcv_wnd; + newtp->rx_opt.wscale_ok = ireq->wscale_ok; + if (newtp->rx_opt.wscale_ok) { + newtp->rx_opt.snd_wscale = ireq->snd_wscale; + newtp->rx_opt.rcv_wscale = ireq->rcv_wscale; + } else { + newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0; + newtp->window_clamp = min(newtp->window_clamp, 65535U); + } + newtp->snd_wnd = ntohs(tcp_hdr(skb)->window) << newtp->rx_opt.snd_wscale; + newtp->max_window = newtp->snd_wnd; + + if (newtp->rx_opt.tstamp_ok) { + newtp->rx_opt.ts_recent = req->ts_recent; + newtp->rx_opt.ts_recent_stamp = get_seconds(); + newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; + } else { + newtp->rx_opt.ts_recent_stamp = 0; + newtp->tcp_header_len = sizeof(struct tcphdr); + } + newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG - newtp->md5sig_info = NULL; /*XXX*/ - if (newtp->af_specific->md5_lookup(sk, newsk)) - newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; + newtp->md5sig_info = NULL; /*XXX*/ + if (newtp->af_specific->md5_lookup(sk, newsk)) + newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; #endif - if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) - newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; - newtp->rx_opt.mss_clamp = req->mss; - tcp_ecn_openreq_child(newtp, req); - newtp->fastopen_req = NULL; - newtp->fastopen_rsk = NULL; - newtp->syn_data_acked = 0; - newtp->rack.mstamp = 0; - newtp->rack.advanced = 0; - newtp->rack.reo_wnd_steps = 1; - newtp->rack.last_delivered = 0; - newtp->rack.reo_wnd_persist = 0; - newtp->rack.dsack_seen = 0; - - __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); - } + if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) + newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; + newtp->rx_opt.mss_clamp = req->mss; + tcp_ecn_openreq_child(newtp, req); + newtp->fastopen_req = NULL; + newtp->fastopen_rsk = NULL; + newtp->syn_data_acked = 0; + newtp->rack.mstamp = 0; + newtp->rack.advanced = 0; + newtp->rack.reo_wnd_steps = 1; + newtp->rack.last_delivered = 0; + newtp->rack.reo_wnd_persist = 0; + newtp->rack.dsack_seen = 0; + + __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); + return newsk; } EXPORT_SYMBOL(tcp_create_openreq_child); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 8cc7c3487330..f5aee641f825 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -180,9 +180,9 @@ out: return segs; } -struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) { - struct sk_buff **pp = NULL; + struct sk_buff *pp = NULL; struct sk_buff *p; struct tcphdr *th; struct tcphdr *th2; @@ -220,7 +220,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) len = skb_gro_len(skb); flags = tcp_flag_word(th); - for (; (p = *head); head = &p->next) { + list_for_each_entry(p, head, list) { if (!NAPI_GRO_CB(p)->same_flow) continue; @@ -233,7 +233,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) goto found; } - + p = NULL; goto out_check_final; found: @@ -263,7 +263,7 @@ found: flush |= (len - 1) >= mss; flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); - if (flush || skb_gro_receive(head, skb)) { + if (flush || skb_gro_receive(p, skb)) { mss = 1; goto out_check_final; } @@ -277,7 +277,7 @@ out_check_final: TCP_FLAG_FIN)); if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) - pp = head; + pp = p; out: NAPI_GRO_CB(skb)->flush |= (flush != 0); @@ -302,7 +302,7 @@ int tcp_gro_complete(struct sk_buff *skb) } EXPORT_SYMBOL(tcp_gro_complete); -static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) +static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb) { /* Don't bother verifying checksum if we're going to flush anyway. */ if (!NAPI_GRO_CB(skb)->flush && diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8e08b409c71e..f8f6129160dd 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -973,17 +973,6 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer) return HRTIMER_NORESTART; } -/* BBR congestion control needs pacing. - * Same remark for SO_MAX_PACING_RATE. - * sch_fq packet scheduler is efficiently handling pacing, - * but is not always installed/used. - * Return true if TCP stack should pace packets itself. - */ -static bool tcp_needs_internal_pacing(const struct sock *sk) -{ - return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED; -} - static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb) { u64 len_ns; @@ -995,9 +984,6 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb) if (!rate || rate == ~0U) return; - /* Should account for header sizes as sch_fq does, - * but lets make things simple. - */ len_ns = (u64)skb->len * NSEC_PER_SEC; do_div(len_ns, rate); hrtimer_start(&tcp_sk(sk)->pacing_timer, diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 69c54540d5b4..0c0522b79b43 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -343,10 +343,11 @@ out: return segs; } -struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, - struct udphdr *uh, udp_lookup_t lookup) +struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, + struct udphdr *uh, udp_lookup_t lookup) { - struct sk_buff *p, **pp = NULL; + struct sk_buff *pp = NULL; + struct sk_buff *p; struct udphdr *uh2; unsigned int off = skb_gro_offset(skb); int flush = 1; @@ -371,7 +372,7 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, unflush: flush = 0; - for (p = *head; p; p = p->next) { + list_for_each_entry(p, head, list) { if (!NAPI_GRO_CB(p)->same_flow) continue; @@ -399,8 +400,8 @@ out: } EXPORT_SYMBOL(udp_gro_receive); -static struct sk_buff **udp4_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff *udp4_gro_receive(struct list_head *head, + struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 27f59b61f70f..ddfa533a84e5 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -49,8 +49,8 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen) return 0; } -static struct sk_buff **esp6_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff *esp6_gro_receive(struct list_head *head, + struct sk_buff *skb) { int offset = skb_gro_offset(skb); struct xfrm_offload *xo; diff --git a/net/ipv6/ila/Makefile b/net/ipv6/ila/Makefile index 4b32e5921e5c..b7739aba6e68 100644 --- a/net/ipv6/ila/Makefile +++ b/net/ipv6/ila/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_IPV6_ILA) += ila.o -ila-objs := ila_common.o ila_lwt.o ila_xlat.o +ila-objs := ila_main.o ila_common.o ila_lwt.o ila_xlat.o diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h index 3c7a11b62334..1f747bcbec29 100644 --- a/net/ipv6/ila/ila.h +++ b/net/ipv6/ila/ila.h @@ -19,6 +19,7 @@ #include <linux/skbuff.h> #include <linux/types.h> #include <net/checksum.h> +#include <net/genetlink.h> #include <net/ip.h> #include <net/protocol.h> #include <uapi/linux/ila.h> @@ -104,9 +105,31 @@ void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p, void ila_init_saved_csum(struct ila_params *p); +struct ila_net { + struct { + struct rhashtable rhash_table; + spinlock_t *locks; /* Bucket locks for entry manipulation */ + unsigned int locks_mask; + bool hooks_registered; + } xlat; +}; + int ila_lwt_init(void); void ila_lwt_fini(void); -int ila_xlat_init(void); -void ila_xlat_fini(void); + +int ila_xlat_init_net(struct net *net); +void ila_xlat_exit_net(struct net *net); + +int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info); +int ila_xlat_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info); +int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info); +int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info); +int ila_xlat_nl_dump_start(struct netlink_callback *cb); +int ila_xlat_nl_dump_done(struct netlink_callback *cb); +int ila_xlat_nl_dump(struct sk_buff *skb, struct netlink_callback *cb); + +extern unsigned int ila_net_id; + +extern struct genl_family ila_nl_family; #endif /* __ILA_H */ diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index 8c88ecf29b93..579310466eac 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -154,33 +154,3 @@ void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p, iaddr->loc = p->locator; } -static int __init ila_init(void) -{ - int ret; - - ret = ila_lwt_init(); - - if (ret) - goto fail_lwt; - - ret = ila_xlat_init(); - if (ret) - goto fail_xlat; - - return 0; -fail_xlat: - ila_lwt_fini(); -fail_lwt: - return ret; -} - -static void __exit ila_fini(void) -{ - ila_xlat_fini(); - ila_lwt_fini(); -} - -module_init(ila_init); -module_exit(ila_fini); -MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>"); -MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c new file mode 100644 index 000000000000..18fac76b9520 --- /dev/null +++ b/net/ipv6/ila/ila_main.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <net/genetlink.h> +#include <net/ila.h> +#include <net/netns/generic.h> +#include <uapi/linux/genetlink.h> +#include "ila.h" + +static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { + [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, + [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, + [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, + [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, + [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, }, +}; + +static const struct genl_ops ila_nl_ops[] = { + { + .cmd = ILA_CMD_ADD, + .doit = ila_xlat_nl_cmd_add_mapping, + .policy = ila_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = ILA_CMD_DEL, + .doit = ila_xlat_nl_cmd_del_mapping, + .policy = ila_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = ILA_CMD_FLUSH, + .doit = ila_xlat_nl_cmd_flush, + .policy = ila_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = ILA_CMD_GET, + .doit = ila_xlat_nl_cmd_get_mapping, + .start = ila_xlat_nl_dump_start, + .dumpit = ila_xlat_nl_dump, + .done = ila_xlat_nl_dump_done, + .policy = ila_nl_policy, + }, +}; + +unsigned int ila_net_id; + +struct genl_family ila_nl_family __ro_after_init = { + .hdrsize = 0, + .name = ILA_GENL_NAME, + .version = ILA_GENL_VERSION, + .maxattr = ILA_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .ops = ila_nl_ops, + .n_ops = ARRAY_SIZE(ila_nl_ops), +}; + +static __net_init int ila_init_net(struct net *net) +{ + int err; + + err = ila_xlat_init_net(net); + if (err) + goto ila_xlat_init_fail; + + return 0; + +ila_xlat_init_fail: + return err; +} + +static __net_exit void ila_exit_net(struct net *net) +{ + ila_xlat_exit_net(net); +} + +static struct pernet_operations ila_net_ops = { + .init = ila_init_net, + .exit = ila_exit_net, + .id = &ila_net_id, + .size = sizeof(struct ila_net), +}; + +static int __init ila_init(void) +{ + int ret; + + ret = register_pernet_device(&ila_net_ops); + if (ret) + goto register_device_fail; + + ret = genl_register_family(&ila_nl_family); + if (ret) + goto register_family_fail; + + ret = ila_lwt_init(); + if (ret) + goto fail_lwt; + + return 0; + +fail_lwt: + genl_unregister_family(&ila_nl_family); +register_family_fail: + unregister_pernet_device(&ila_net_ops); +register_device_fail: + return ret; +} + +static void __exit ila_fini(void) +{ + ila_lwt_fini(); + genl_unregister_family(&ila_nl_family); + unregister_pernet_device(&ila_net_ops); +} + +module_init(ila_init); +module_exit(ila_fini); +MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>"); +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 10ae13560b40..51a15ce50a64 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -22,36 +22,14 @@ struct ila_map { struct rcu_head rcu; }; -static unsigned int ila_net_id; - -struct ila_net { - struct rhashtable rhash_table; - spinlock_t *locks; /* Bucket locks for entry manipulation */ - unsigned int locks_mask; - bool hooks_registered; -}; - +#define MAX_LOCKS 1024 #define LOCKS_PER_CPU 10 static int alloc_ila_locks(struct ila_net *ilan) { - unsigned int i, size; - unsigned int nr_pcpus = num_possible_cpus(); - - nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); - size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU); - - if (sizeof(spinlock_t) != 0) { - ilan->locks = kvmalloc_array(size, sizeof(spinlock_t), - GFP_KERNEL); - if (!ilan->locks) - return -ENOMEM; - for (i = 0; i < size; i++) - spin_lock_init(&ilan->locks[i]); - } - ilan->locks_mask = size - 1; - - return 0; + return alloc_bucket_spinlocks(&ilan->xlat.locks, &ilan->xlat.locks_mask, + MAX_LOCKS, LOCKS_PER_CPU, + GFP_KERNEL); } static u32 hashrnd __read_mostly; @@ -71,7 +49,7 @@ static inline u32 ila_locator_hash(struct ila_locator loc) static inline spinlock_t *ila_get_lock(struct ila_net *ilan, struct ila_locator loc) { - return &ilan->locks[ila_locator_hash(loc) & ilan->locks_mask]; + return &ilan->xlat.locks[ila_locator_hash(loc) & ilan->xlat.locks_mask]; } static inline int ila_cmp_wildcards(struct ila_map *ila, @@ -115,16 +93,6 @@ static const struct rhashtable_params rht_params = { .obj_cmpfn = ila_cmpfn, }; -static struct genl_family ila_nl_family; - -static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { - [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, - [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, - [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, - [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, - [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, }, -}; - static int parse_nl_config(struct genl_info *info, struct ila_xlat_params *xp) { @@ -162,7 +130,7 @@ static inline struct ila_map *ila_lookup_wildcards(struct ila_addr *iaddr, { struct ila_map *ila; - ila = rhashtable_lookup_fast(&ilan->rhash_table, &iaddr->loc, + ila = rhashtable_lookup_fast(&ilan->xlat.rhash_table, &iaddr->loc, rht_params); while (ila) { if (!ila_cmp_wildcards(ila, iaddr, ifindex)) @@ -179,7 +147,7 @@ static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *xp, { struct ila_map *ila; - ila = rhashtable_lookup_fast(&ilan->rhash_table, + ila = rhashtable_lookup_fast(&ilan->xlat.rhash_table, &xp->ip.locator_match, rht_params); while (ila) { @@ -196,9 +164,9 @@ static inline void ila_release(struct ila_map *ila) kfree_rcu(ila, rcu); } -static void ila_free_cb(void *ptr, void *arg) +static void ila_free_node(struct ila_map *ila) { - struct ila_map *ila = (struct ila_map *)ptr, *next; + struct ila_map *next; /* Assume rcu_readlock held */ while (ila) { @@ -208,6 +176,11 @@ static void ila_free_cb(void *ptr, void *arg) } } +static void ila_free_cb(void *ptr, void *arg) +{ + ila_free_node((struct ila_map *)ptr); +} + static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila); static unsigned int @@ -235,7 +208,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp) spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match); int err = 0, order; - if (!ilan->hooks_registered) { + if (!ilan->xlat.hooks_registered) { /* We defer registering net hooks in the namespace until the * first mapping is added. */ @@ -244,7 +217,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp) if (err) return err; - ilan->hooks_registered = true; + ilan->xlat.hooks_registered = true; } ila = kzalloc(sizeof(*ila), GFP_KERNEL); @@ -259,12 +232,12 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp) spin_lock(lock); - head = rhashtable_lookup_fast(&ilan->rhash_table, + head = rhashtable_lookup_fast(&ilan->xlat.rhash_table, &xp->ip.locator_match, rht_params); if (!head) { /* New entry for the rhash_table */ - err = rhashtable_lookup_insert_fast(&ilan->rhash_table, + err = rhashtable_lookup_insert_fast(&ilan->xlat.rhash_table, &ila->node, rht_params); } else { struct ila_map *tila = head, *prev = NULL; @@ -290,7 +263,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp) } else { /* Make this ila new head */ RCU_INIT_POINTER(ila->next, head); - err = rhashtable_replace_fast(&ilan->rhash_table, + err = rhashtable_replace_fast(&ilan->xlat.rhash_table, &head->node, &ila->node, rht_params); if (err) @@ -316,7 +289,7 @@ static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp) spin_lock(lock); - head = rhashtable_lookup_fast(&ilan->rhash_table, + head = rhashtable_lookup_fast(&ilan->xlat.rhash_table, &xp->ip.locator_match, rht_params); ila = head; @@ -346,15 +319,15 @@ static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp) * table */ err = rhashtable_replace_fast( - &ilan->rhash_table, &ila->node, + &ilan->xlat.rhash_table, &ila->node, &head->node, rht_params); if (err) goto out; } else { /* Entry no longer used */ - err = rhashtable_remove_fast(&ilan->rhash_table, - &ila->node, - rht_params); + err = rhashtable_remove_fast( + &ilan->xlat.rhash_table, + &ila->node, rht_params); } } @@ -369,7 +342,7 @@ out: return err; } -static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info) +int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct ila_xlat_params p; @@ -382,7 +355,7 @@ static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info) return ila_add_mapping(net, &p); } -static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info) +int ila_xlat_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct ila_xlat_params xp; @@ -397,6 +370,59 @@ static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info) return 0; } +static inline spinlock_t *lock_from_ila_map(struct ila_net *ilan, + struct ila_map *ila) +{ + return ila_get_lock(ilan, ila->xp.ip.locator_match); +} + +int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct ila_net *ilan = net_generic(net, ila_net_id); + struct rhashtable_iter iter; + struct ila_map *ila; + spinlock_t *lock; + int ret; + + ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter, GFP_KERNEL); + if (ret) + goto done; + + rhashtable_walk_start(&iter); + + for (;;) { + ila = rhashtable_walk_next(&iter); + + if (IS_ERR(ila)) { + if (PTR_ERR(ila) == -EAGAIN) + continue; + ret = PTR_ERR(ila); + goto done; + } else if (!ila) { + break; + } + + lock = lock_from_ila_map(ilan, ila); + + spin_lock(lock); + + ret = rhashtable_remove_fast(&ilan->xlat.rhash_table, + &ila->node, rht_params); + if (!ret) + ila_free_node(ila); + + spin_unlock(lock); + + if (ret) + break; + } + +done: + rhashtable_walk_stop(&iter); + return ret; +} + static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg) { if (nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR, @@ -434,7 +460,7 @@ nla_put_failure: return -EMSGSIZE; } -static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) +int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct ila_net *ilan = net_generic(net, ila_net_id); @@ -475,27 +501,34 @@ out_free: struct ila_dump_iter { struct rhashtable_iter rhiter; + int skip; }; -static int ila_nl_dump_start(struct netlink_callback *cb) +int ila_xlat_nl_dump_start(struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct ila_net *ilan = net_generic(net, ila_net_id); - struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; + struct ila_dump_iter *iter; + int ret; - if (!iter) { - iter = kmalloc(sizeof(*iter), GFP_KERNEL); - if (!iter) - return -ENOMEM; + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; - cb->args[0] = (long)iter; + ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter->rhiter, + GFP_KERNEL); + if (ret) { + kfree(iter); + return ret; } - return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter, - GFP_KERNEL); + iter->skip = 0; + cb->args[0] = (long)iter; + + return ret; } -static int ila_nl_dump_done(struct netlink_callback *cb) +int ila_xlat_nl_dump_done(struct netlink_callback *cb) { struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; @@ -506,24 +539,49 @@ static int ila_nl_dump_done(struct netlink_callback *cb) return 0; } -static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) +int ila_xlat_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; struct rhashtable_iter *rhiter = &iter->rhiter; + int skip = iter->skip; struct ila_map *ila; int ret; rhashtable_walk_start(rhiter); - for (;;) { - ila = rhashtable_walk_next(rhiter); + /* Get first entry */ + ila = rhashtable_walk_peek(rhiter); + + if (ila && !IS_ERR(ila) && skip) { + /* Skip over visited entries */ + while (ila && skip) { + /* Skip over any ila entries in this list that we + * have already dumped. + */ + ila = rcu_access_pointer(ila->next); + skip--; + } + } + + skip = 0; + + for (;;) { if (IS_ERR(ila)) { - if (PTR_ERR(ila) == -EAGAIN) - continue; ret = PTR_ERR(ila); - goto done; + if (ret == -EAGAIN) { + /* Table has changed and iter has reset. Return + * -EAGAIN to the application even if we have + * written data to the skb. The application + * needs to deal with this. + */ + + goto out_ret; + } else { + break; + } } else if (!ila) { + ret = 0; break; } @@ -532,90 +590,54 @@ static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, NLM_F_MULTI, skb, ILA_CMD_GET); if (ret) - goto done; + goto out; + skip++; ila = rcu_access_pointer(ila->next); } + + skip = 0; + ila = rhashtable_walk_next(rhiter); } - ret = skb->len; +out: + iter->skip = skip; + ret = (skb->len ? : ret); -done: +out_ret: rhashtable_walk_stop(rhiter); return ret; } -static const struct genl_ops ila_nl_ops[] = { - { - .cmd = ILA_CMD_ADD, - .doit = ila_nl_cmd_add_mapping, - .policy = ila_nl_policy, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = ILA_CMD_DEL, - .doit = ila_nl_cmd_del_mapping, - .policy = ila_nl_policy, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = ILA_CMD_GET, - .doit = ila_nl_cmd_get_mapping, - .start = ila_nl_dump_start, - .dumpit = ila_nl_dump, - .done = ila_nl_dump_done, - .policy = ila_nl_policy, - }, -}; - -static struct genl_family ila_nl_family __ro_after_init = { - .hdrsize = 0, - .name = ILA_GENL_NAME, - .version = ILA_GENL_VERSION, - .maxattr = ILA_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, - .module = THIS_MODULE, - .ops = ila_nl_ops, - .n_ops = ARRAY_SIZE(ila_nl_ops), -}; - #define ILA_HASH_TABLE_SIZE 1024 -static __net_init int ila_init_net(struct net *net) +int ila_xlat_init_net(struct net *net) { - int err; struct ila_net *ilan = net_generic(net, ila_net_id); + int err; err = alloc_ila_locks(ilan); if (err) return err; - rhashtable_init(&ilan->rhash_table, &rht_params); + rhashtable_init(&ilan->xlat.rhash_table, &rht_params); return 0; } -static __net_exit void ila_exit_net(struct net *net) +void ila_xlat_exit_net(struct net *net) { struct ila_net *ilan = net_generic(net, ila_net_id); - rhashtable_free_and_destroy(&ilan->rhash_table, ila_free_cb, NULL); + rhashtable_free_and_destroy(&ilan->xlat.rhash_table, ila_free_cb, NULL); - kvfree(ilan->locks); + free_bucket_spinlocks(ilan->xlat.locks); - if (ilan->hooks_registered) + if (ilan->xlat.hooks_registered) nf_unregister_net_hooks(net, ila_nf_hook_ops, ARRAY_SIZE(ila_nf_hook_ops)); } -static struct pernet_operations ila_net_ops = { - .init = ila_init_net, - .exit = ila_exit_net, - .id = &ila_net_id, - .size = sizeof(struct ila_net), -}; - static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila) { struct ila_map *ila; @@ -642,28 +664,3 @@ static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila) return 0; } -int __init ila_xlat_init(void) -{ - int ret; - - ret = register_pernet_device(&ila_net_ops); - if (ret) - goto exit; - - ret = genl_register_family(&ila_nl_family); - if (ret < 0) - goto unregister; - - return 0; - -unregister: - unregister_pernet_device(&ila_net_ops); -exit: - return ret; -} - -void ila_xlat_fini(void) -{ - genl_unregister_family(&ila_nl_family); - unregister_pernet_device(&ila_net_ops); -} diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c8cf2fdbb13b..367177786e34 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -990,6 +990,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); dsfield = key->tos; + if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) + goto tx_err; md = ip_tunnel_info_opts(tun_info); if (!md) goto tx_err; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 5b3f2f89ef41..37ff4805b20c 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -163,11 +163,11 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph, return len; } -static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff *ipv6_gro_receive(struct list_head *head, + struct sk_buff *skb) { const struct net_offload *ops; - struct sk_buff **pp = NULL; + struct sk_buff *pp = NULL; struct sk_buff *p; struct ipv6hdr *iph; unsigned int nlen; @@ -214,7 +214,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, flush--; nlen = skb_network_header_len(skb); - for (p = *head; p; p = p->next) { + list_for_each_entry(p, head, list) { const struct ipv6hdr *iph2; __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */ @@ -263,8 +263,8 @@ out: return pp; } -static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff *sit_ip6ip6_gro_receive(struct list_head *head, + struct sk_buff *skb) { /* Common GRO receive for SIT and IP6IP6 */ @@ -278,8 +278,8 @@ static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head, return ipv6_gro_receive(head, skb); } -static struct sk_buff **ip4ip6_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff *ip4ip6_gro_receive(struct list_head *head, + struct sk_buff *skb) { /* Common GRO receive for SIT and IP6IP6 */ diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0d0f0053bb11..d0b7e0249c13 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -32,6 +32,7 @@ #include <linux/seq_file.h> #include <linux/init.h> #include <linux/compat.h> +#include <linux/rhashtable.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <net/raw.h> diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index b397a8fe88b9..c6bf580d0f33 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -36,7 +36,7 @@ static const struct nf_loginfo default_loginfo = { }; /* One level of recursion won't kill us */ -static void dump_ipv6_packet(struct nf_log_buf *m, +static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m, const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int ip6hoff, int recurse) @@ -258,7 +258,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, /* Max length: 3+maxlen */ if (recurse) { nf_log_buf_add(m, "["); - dump_ipv6_packet(m, info, skb, + dump_ipv6_packet(net, m, info, skb, ptr + sizeof(_icmp6h), 0); nf_log_buf_add(m, "] "); } @@ -278,7 +278,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, /* Max length: 15 "UID=4294967295 " */ if ((logflags & NF_LOG_UID) && recurse) - nf_log_dump_sk_uid_gid(m, skb->sk); + nf_log_dump_sk_uid_gid(net, m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ if (recurse && skb->mark) @@ -365,7 +365,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf, if (in != NULL) dump_ipv6_mac_header(m, loginfo, skb); - dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1); + dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1); nf_log_buf_close(m); } diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 0fdf2a55e746..8d0ba757a46c 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -17,6 +17,7 @@ #include <linux/net.h> #include <linux/in6.h> #include <linux/slab.h> +#include <linux/rhashtable.h> #include <net/ipv6.h> #include <net/protocol.h> diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 558fe8cc6d43..8546f94f30d4 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -22,6 +22,7 @@ #include <linux/icmpv6.h> #include <linux/mroute6.h> #include <linux/slab.h> +#include <linux/rhashtable.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 278e49cd67d4..e72947c99454 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -15,8 +15,8 @@ #include <net/ip6_checksum.h> #include "ip6_offload.h" -static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff *tcp6_gro_receive(struct list_head *head, + struct sk_buff *skb) { /* Don't bother verifying checksum if we're going to flush anyway. */ if (!NAPI_GRO_CB(skb)->flush && diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 03a2ff3fe1e6..95dee9ca8d22 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -114,8 +114,8 @@ out: return segs; } -static struct sk_buff **udp6_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff *udp6_gro_receive(struct list_head *head, + struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 40261cb68e83..1ea285bad84b 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -322,8 +322,7 @@ int l2tp_session_register(struct l2tp_session *session, if (tunnel->version == L2TP_HDR_VER_3) { pn = l2tp_pernet(tunnel->l2tp_net); - g_head = l2tp_session_id_hash_2(l2tp_pernet(tunnel->l2tp_net), - session->session_id); + g_head = l2tp_session_id_hash_2(pn, session->session_id); spin_lock_bh(&pn->l2tp_session_hlist_lock); @@ -783,7 +782,7 @@ EXPORT_SYMBOL(l2tp_recv_common); /* Drop skbs from the session's reorder_q */ -int l2tp_session_queue_purge(struct l2tp_session *session) +static int l2tp_session_queue_purge(struct l2tp_session *session) { struct sk_buff *skb = NULL; BUG_ON(!session); @@ -794,7 +793,6 @@ int l2tp_session_queue_purge(struct l2tp_session *session) } return 0; } -EXPORT_SYMBOL_GPL(l2tp_session_queue_purge); /* Internal UDP receive frame. Do the real work of receiving an L2TP data frame * here. The skb is not on a list when we get here. @@ -1009,8 +1007,8 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf) return bufp - optr; } -static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, - struct flowi *fl, size_t data_len) +static void l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, + struct flowi *fl, size_t data_len) { struct l2tp_tunnel *tunnel = session->tunnel; unsigned int len = skb->len; @@ -1052,8 +1050,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, atomic_long_inc(&tunnel->stats.tx_errors); atomic_long_inc(&session->stats.tx_errors); } - - return 0; } /* If caller requires the skb to have a ppp header, the header must be @@ -1193,7 +1189,7 @@ end: /* When the tunnel is closed, all the attached sessions need to go too. */ -void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) +static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) { int hash; struct hlist_node *walk; @@ -1242,7 +1238,6 @@ again: } write_unlock_bh(&tunnel->hlist_lock); } -EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall); /* Tunnel socket destroy hook for UDP encapsulation */ static void l2tp_udp_encap_destroy(struct sock *sk) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index c199020f8a8a..a5c09d3a5698 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -180,9 +180,6 @@ struct l2tp_tunnel { struct net *l2tp_net; /* the net we belong to */ refcount_t ref_count; -#ifdef CONFIG_DEBUG_FS - void (*show)(struct seq_file *m, void *arg); -#endif int (*recv_payload_hook)(struct sk_buff *skb); void (*old_sk_destruct)(struct sock *); struct sock *sock; /* Parent socket */ @@ -190,8 +187,6 @@ struct l2tp_tunnel { * was created by userspace */ struct work_struct del_work; - - uint8_t priv[0]; /* private data */ }; struct l2tp_nl_cmd_ops { @@ -201,11 +196,6 @@ struct l2tp_nl_cmd_ops { int (*session_delete)(struct l2tp_session *session); }; -static inline void *l2tp_tunnel_priv(struct l2tp_tunnel *tunnel) -{ - return &tunnel->priv[0]; -} - static inline void *l2tp_session_priv(struct l2tp_session *session) { return &session->priv[0]; @@ -229,7 +219,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, struct l2tp_tunnel_cfg *cfg); -void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, @@ -244,7 +233,6 @@ void l2tp_session_free(struct l2tp_session *session); void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb)); -int l2tp_session_queue_purge(struct l2tp_session *session); int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); void l2tp_session_set_header_len(struct l2tp_session *session, int version); diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index e87686f7d63c..b5d7dde003ef 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -177,9 +177,6 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) atomic_long_read(&tunnel->stats.rx_packets), atomic_long_read(&tunnel->stats.rx_bytes), atomic_long_read(&tunnel->stats.rx_errors)); - - if (tunnel->show != NULL) - tunnel->show(m, tunnel); } static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index e398797878a9..9ac02c93df98 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -424,12 +424,6 @@ static void pppol2tp_put_sk(struct rcu_head *head) sock_put(ps->__sk); } -/* Called by l2tp_core when a session socket is being closed. - */ -static void pppol2tp_session_close(struct l2tp_session *session) -{ -} - /* Really kill the session socket. (Called from sock_put() if * refcnt == 0.) */ @@ -573,7 +567,6 @@ static void pppol2tp_session_init(struct l2tp_session *session) struct dst_entry *dst; session->recv_skb = pppol2tp_recv; - session->session_close = pppol2tp_session_close; #if IS_ENABLED(CONFIG_L2TP_DEBUGFS) session->show = pppol2tp_show; #endif @@ -595,40 +588,113 @@ static void pppol2tp_session_init(struct l2tp_session *session) } } +struct l2tp_connect_info { + u8 version; + int fd; + u32 tunnel_id; + u32 peer_tunnel_id; + u32 session_id; + u32 peer_session_id; +}; + +static int pppol2tp_sockaddr_get_info(const void *sa, int sa_len, + struct l2tp_connect_info *info) +{ + switch (sa_len) { + case sizeof(struct sockaddr_pppol2tp): + { + const struct sockaddr_pppol2tp *sa_v2in4 = sa; + + if (sa_v2in4->sa_protocol != PX_PROTO_OL2TP) + return -EINVAL; + + info->version = 2; + info->fd = sa_v2in4->pppol2tp.fd; + info->tunnel_id = sa_v2in4->pppol2tp.s_tunnel; + info->peer_tunnel_id = sa_v2in4->pppol2tp.d_tunnel; + info->session_id = sa_v2in4->pppol2tp.s_session; + info->peer_session_id = sa_v2in4->pppol2tp.d_session; + + break; + } + case sizeof(struct sockaddr_pppol2tpv3): + { + const struct sockaddr_pppol2tpv3 *sa_v3in4 = sa; + + if (sa_v3in4->sa_protocol != PX_PROTO_OL2TP) + return -EINVAL; + + info->version = 3; + info->fd = sa_v3in4->pppol2tp.fd; + info->tunnel_id = sa_v3in4->pppol2tp.s_tunnel; + info->peer_tunnel_id = sa_v3in4->pppol2tp.d_tunnel; + info->session_id = sa_v3in4->pppol2tp.s_session; + info->peer_session_id = sa_v3in4->pppol2tp.d_session; + + break; + } + case sizeof(struct sockaddr_pppol2tpin6): + { + const struct sockaddr_pppol2tpin6 *sa_v2in6 = sa; + + if (sa_v2in6->sa_protocol != PX_PROTO_OL2TP) + return -EINVAL; + + info->version = 2; + info->fd = sa_v2in6->pppol2tp.fd; + info->tunnel_id = sa_v2in6->pppol2tp.s_tunnel; + info->peer_tunnel_id = sa_v2in6->pppol2tp.d_tunnel; + info->session_id = sa_v2in6->pppol2tp.s_session; + info->peer_session_id = sa_v2in6->pppol2tp.d_session; + + break; + } + case sizeof(struct sockaddr_pppol2tpv3in6): + { + const struct sockaddr_pppol2tpv3in6 *sa_v3in6 = sa; + + if (sa_v3in6->sa_protocol != PX_PROTO_OL2TP) + return -EINVAL; + + info->version = 3; + info->fd = sa_v3in6->pppol2tp.fd; + info->tunnel_id = sa_v3in6->pppol2tp.s_tunnel; + info->peer_tunnel_id = sa_v3in6->pppol2tp.d_tunnel; + info->session_id = sa_v3in6->pppol2tp.s_session; + info->peer_session_id = sa_v3in6->pppol2tp.d_session; + + break; + } + default: + return -EINVAL; + } + + return 0; +} + /* connect() handler. Attach a PPPoX socket to a tunnel UDP socket */ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, int sockaddr_len, int flags) { struct sock *sk = sock->sk; - struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr; struct pppox_sock *po = pppox_sk(sk); struct l2tp_session *session = NULL; + struct l2tp_connect_info info; struct l2tp_tunnel *tunnel; struct pppol2tp_session *ps; struct l2tp_session_cfg cfg = { 0, }; - int error = 0; - u32 tunnel_id, peer_tunnel_id; - u32 session_id, peer_session_id; bool drop_refcnt = false; bool drop_tunnel = false; bool new_session = false; bool new_tunnel = false; - int ver = 2; - int fd; - - lock_sock(sk); - - error = -EINVAL; + int error; - if (sockaddr_len != sizeof(struct sockaddr_pppol2tp) && - sockaddr_len != sizeof(struct sockaddr_pppol2tpv3) && - sockaddr_len != sizeof(struct sockaddr_pppol2tpin6) && - sockaddr_len != sizeof(struct sockaddr_pppol2tpv3in6)) - goto end; + error = pppol2tp_sockaddr_get_info(uservaddr, sockaddr_len, &info); + if (error < 0) + return error; - if (sp->sa_protocol != PX_PROTO_OL2TP) - goto end; + lock_sock(sk); /* Check for already bound sockets */ error = -EBUSY; @@ -640,56 +706,12 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, if (sk->sk_user_data) goto end; /* socket is already attached */ - /* Get params from socket address. Handle L2TPv2 and L2TPv3. - * This is nasty because there are different sockaddr_pppol2tp - * structs for L2TPv2, L2TPv3, over IPv4 and IPv6. We use - * the sockaddr size to determine which structure the caller - * is using. - */ - peer_tunnel_id = 0; - if (sockaddr_len == sizeof(struct sockaddr_pppol2tp)) { - fd = sp->pppol2tp.fd; - tunnel_id = sp->pppol2tp.s_tunnel; - peer_tunnel_id = sp->pppol2tp.d_tunnel; - session_id = sp->pppol2tp.s_session; - peer_session_id = sp->pppol2tp.d_session; - } else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpv3)) { - struct sockaddr_pppol2tpv3 *sp3 = - (struct sockaddr_pppol2tpv3 *) sp; - ver = 3; - fd = sp3->pppol2tp.fd; - tunnel_id = sp3->pppol2tp.s_tunnel; - peer_tunnel_id = sp3->pppol2tp.d_tunnel; - session_id = sp3->pppol2tp.s_session; - peer_session_id = sp3->pppol2tp.d_session; - } else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpin6)) { - struct sockaddr_pppol2tpin6 *sp6 = - (struct sockaddr_pppol2tpin6 *) sp; - fd = sp6->pppol2tp.fd; - tunnel_id = sp6->pppol2tp.s_tunnel; - peer_tunnel_id = sp6->pppol2tp.d_tunnel; - session_id = sp6->pppol2tp.s_session; - peer_session_id = sp6->pppol2tp.d_session; - } else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpv3in6)) { - struct sockaddr_pppol2tpv3in6 *sp6 = - (struct sockaddr_pppol2tpv3in6 *) sp; - ver = 3; - fd = sp6->pppol2tp.fd; - tunnel_id = sp6->pppol2tp.s_tunnel; - peer_tunnel_id = sp6->pppol2tp.d_tunnel; - session_id = sp6->pppol2tp.s_session; - peer_session_id = sp6->pppol2tp.d_session; - } else { - error = -EINVAL; - goto end; /* bad socket address */ - } - /* Don't bind if tunnel_id is 0 */ error = -EINVAL; - if (tunnel_id == 0) + if (!info.tunnel_id) goto end; - tunnel = l2tp_tunnel_get(sock_net(sk), tunnel_id); + tunnel = l2tp_tunnel_get(sock_net(sk), info.tunnel_id); if (tunnel) drop_tunnel = true; @@ -697,7 +719,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, * peer_session_id is 0. Otherwise look up tunnel using supplied * tunnel id. */ - if ((session_id == 0) && (peer_session_id == 0)) { + if (!info.session_id && !info.peer_session_id) { if (tunnel == NULL) { struct l2tp_tunnel_cfg tcfg = { .encap = L2TP_ENCAPTYPE_UDP, @@ -707,12 +729,16 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, /* Prevent l2tp_tunnel_register() from trying to set up * a kernel socket. */ - if (fd < 0) { + if (info.fd < 0) { error = -EBADF; goto end; } - error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel); + error = l2tp_tunnel_create(sock_net(sk), info.fd, + info.version, + info.tunnel_id, + info.peer_tunnel_id, &tcfg, + &tunnel); if (error < 0) goto end; @@ -741,9 +767,9 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, tunnel->recv_payload_hook = pppol2tp_recv_payload_hook; if (tunnel->peer_tunnel_id == 0) - tunnel->peer_tunnel_id = peer_tunnel_id; + tunnel->peer_tunnel_id = info.peer_tunnel_id; - session = l2tp_session_get(sock_net(sk), tunnel, session_id); + session = l2tp_session_get(sock_net(sk), tunnel, info.session_id); if (session) { drop_refcnt = true; @@ -772,8 +798,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, cfg.pw_type = L2TP_PWTYPE_PPP; session = l2tp_session_create(sizeof(struct pppol2tp_session), - tunnel, session_id, - peer_session_id, &cfg); + tunnel, info.session_id, + info.peer_session_id, &cfg); if (IS_ERR(session)) { error = PTR_ERR(session); goto end; diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index e3589ade62e0..bb707789ef2b 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -12,6 +12,7 @@ mac80211-y := \ scan.o offchannel.o \ ht.o agg-tx.o agg-rx.o \ vht.o \ + he.o \ ibss.o \ iface.o \ rate.o \ diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index e83c19d4c292..6a4f154c99f6 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -245,6 +245,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, }; int i, ret = -EOPNOTSUPP; u16 status = WLAN_STATUS_REQUEST_DECLINED; + u16 max_buf_size; if (tid >= IEEE80211_FIRST_TSPEC_TSID) { ht_dbg(sta->sdata, @@ -268,13 +269,18 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, goto end; } + if (sta->sta.he_cap.has_he) + max_buf_size = IEEE80211_MAX_AMPDU_BUF; + else + max_buf_size = IEEE80211_MAX_AMPDU_BUF_HT; + /* sanity check for incoming parameters: * check if configuration can support the BA policy * and if buffer size does not exceeds max value */ /* XXX: check own ht delayed BA capability?? */ if (((ba_policy != 1) && (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || - (buf_size > IEEE80211_MAX_AMPDU_BUF)) { + (buf_size > max_buf_size)) { status = WLAN_STATUS_INVALID_QOS_PARAM; ht_dbg_ratelimited(sta->sdata, "AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n", @@ -283,7 +289,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, } /* determine default buffer size */ if (buf_size == 0) - buf_size = IEEE80211_MAX_AMPDU_BUF; + buf_size = max_buf_size; /* make sure the size doesn't exceed the maximum supported by the hw */ if (buf_size > sta->sta.max_rx_aggregation_subframes) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index ac4295296514..69e831bc317b 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -463,6 +463,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) .timeout = 0, }; int ret; + u16 buf_size; tid_tx = rcu_dereference_protected_tid_tx(sta, tid); @@ -511,11 +512,22 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) sta->ampdu_mlme.addba_req_num[tid]++; spin_unlock_bh(&sta->lock); + if (sta->sta.he_cap.has_he) { + buf_size = local->hw.max_tx_aggregation_subframes; + } else { + /* + * We really should use what the driver told us it will + * transmit as the maximum, but certain APs (e.g. the + * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012) + * will crash when we use a lower number. + */ + buf_size = IEEE80211_MAX_AMPDU_BUF_HT; + } + /* send AddBA request */ ieee80211_send_addba_request(sdata, sta->sta.addr, tid, tid_tx->dialog_token, params.ssn, - IEEE80211_MAX_AMPDU_BUF, - tid_tx->timeout); + buf_size, tid_tx->timeout); } /* @@ -905,8 +917,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, { struct tid_ampdu_tx *tid_tx; struct ieee80211_txq *txq; - u16 capab, tid; - u8 buf_size; + u16 capab, tid, buf_size; bool amsdu; capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index bdf6fa78d0d2..02f3672e7b5e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1412,6 +1412,11 @@ static int sta_apply_parameters(struct ieee80211_local *local, ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, params->vht_capa, sta); + if (params->he_capa) + ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, + (void *)params->he_capa, + params->he_capa_len, sta); + if (params->opmode_notif_used) { /* returned value is only needed for rc update, but the * rc isn't initialized here yet, so ignore it @@ -3486,7 +3491,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, } local_bh_disable(); - ieee80211_xmit(sdata, sta, skb); + ieee80211_xmit(sdata, sta, skb, 0); local_bh_enable(); ret = 0; diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index 690c142a7a44..5ac743816b59 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -116,16 +116,16 @@ static void ieee80211_get_stats(struct net_device *dev, data[i++] = sta->sta_state; - if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)) + if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) data[i] = 100000ULL * cfg80211_calculate_bitrate(&sinfo.txrate); i++; - if (sinfo.filled & BIT(NL80211_STA_INFO_RX_BITRATE)) + if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) data[i] = 100000ULL * cfg80211_calculate_bitrate(&sinfo.rxrate); i++; - if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL_AVG)) + if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG)) data[i] = (u8)sinfo.signal_avg; i++; } else { diff --git a/net/mac80211/he.c b/net/mac80211/he.c new file mode 100644 index 000000000000..769078ed5a12 --- /dev/null +++ b/net/mac80211/he.c @@ -0,0 +1,55 @@ +/* + * HE handling + * + * Copyright(c) 2017 Intel Deutschland GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "ieee80211_i.h" + +void +ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const u8 *he_cap_ie, u8 he_cap_len, + struct sta_info *sta) +{ + struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap; + struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie; + u8 he_ppe_size; + u8 mcs_nss_size; + u8 he_total_size; + + memset(he_cap, 0, sizeof(*he_cap)); + + if (!he_cap_ie || !ieee80211_get_he_sta_cap(sband)) + return; + + /* Make sure size is OK */ + mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap_ie_elem); + he_ppe_size = + ieee80211_he_ppe_size(he_cap_ie[sizeof(he_cap->he_cap_elem) + + mcs_nss_size], + he_cap_ie_elem->phy_cap_info); + he_total_size = sizeof(he_cap->he_cap_elem) + mcs_nss_size + + he_ppe_size; + if (he_cap_len < he_total_size) + return; + + memcpy(&he_cap->he_cap_elem, he_cap_ie, sizeof(he_cap->he_cap_elem)); + + /* HE Tx/Rx HE MCS NSS Support Field */ + memcpy(&he_cap->he_mcs_nss_supp, + &he_cap_ie[sizeof(he_cap->he_cap_elem)], mcs_nss_size); + + /* Check if there are (optional) PPE Thresholds */ + if (he_cap->he_cap_elem.phy_cap_info[6] & + IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) + memcpy(he_cap->ppe_thres, + &he_cap_ie[sizeof(he_cap->he_cap_elem) + mcs_nss_size], + he_ppe_size); + + he_cap->has_he = true; +} diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 26a7ba3b698f..f849ea814993 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -352,7 +352,7 @@ void ieee80211_ba_session_work(struct work_struct *work) test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl)) ___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, - IEEE80211_MAX_AMPDU_BUF, + IEEE80211_MAX_AMPDU_BUF_HT, false, true); if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d1978aa1c15d..172aeae21ae9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -165,6 +165,7 @@ typedef unsigned __bitwise ieee80211_tx_result; #define TX_DROP ((__force ieee80211_tx_result) 1u) #define TX_QUEUED ((__force ieee80211_tx_result) 2u) +#define IEEE80211_TX_NO_SEQNO BIT(0) #define IEEE80211_TX_UNICAST BIT(1) #define IEEE80211_TX_PS_BUFFERED BIT(2) @@ -364,6 +365,7 @@ enum ieee80211_sta_flags { IEEE80211_STA_DISABLE_160MHZ = BIT(13), IEEE80211_STA_DISABLE_WMM = BIT(14), IEEE80211_STA_ENABLE_RRM = BIT(15), + IEEE80211_STA_DISABLE_HE = BIT(16), }; struct ieee80211_mgd_auth_data { @@ -1453,6 +1455,10 @@ struct ieee802_11_elems { const struct ieee80211_vht_cap *vht_cap_elem; const struct ieee80211_vht_operation *vht_operation; const struct ieee80211_meshconf_ie *mesh_config; + const u8 *he_cap; + const struct ieee80211_he_operation *he_operation; + const struct ieee80211_mu_edca_param_set *mu_edca_param_set; + const u8 *uora_element; const u8 *mesh_id; const u8 *peering; const __le16 *awake_window; @@ -1482,6 +1488,7 @@ struct ieee802_11_elems { u8 ext_supp_rates_len; u8 wmm_info_len; u8 wmm_param_len; + u8 he_cap_len; u8 mesh_id_len; u8 peering_len; u8 preq_len; @@ -1824,6 +1831,13 @@ void ieee80211_get_vht_mask_from_cap(__le16 vht_cap, enum nl80211_chan_width ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta); +/* HE */ +void +ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const u8 *he_cap_ie, u8 he_cap_len, + struct sta_info *sta); + /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, @@ -1880,19 +1894,20 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, bool bss_notify, bool enable_qos); void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, struct sk_buff *skb); + struct sta_info *sta, struct sk_buff *skb, + u32 txdata_flags); void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, - enum nl80211_band band); + enum nl80211_band band, u32 txdata_flags); static inline void ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, - enum nl80211_band band) + enum nl80211_band band, u32 txdata_flags) { rcu_read_lock(); - __ieee80211_tx_skb_tid_band(sdata, skb, tid, band); + __ieee80211_tx_skb_tid_band(sdata, skb, tid, band, txdata_flags); rcu_read_unlock(); } @@ -1910,7 +1925,7 @@ static inline void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata, } __ieee80211_tx_skb_tid_band(sdata, skb, tid, - chanctx_conf->def.chan->band); + chanctx_conf->def.chan->band, 0); rcu_read_unlock(); } @@ -2031,26 +2046,27 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, const u8 *bssid, u16 stype, u16 reason, bool send_frame, u8 *frame_buf); + +enum { + IEEE80211_PROBE_FLAG_DIRECTED = BIT(0), + IEEE80211_PROBE_FLAG_MIN_CONTENT = BIT(1), + IEEE80211_PROBE_FLAG_RANDOM_SN = BIT(2), +}; + int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, size_t buffer_len, struct ieee80211_scan_ies *ie_desc, const u8 *ie, size_t ie_len, u8 bands_used, u32 *rate_masks, - struct cfg80211_chan_def *chandef); + struct cfg80211_chan_def *chandef, + u32 flags); struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, const u8 *src, const u8 *dst, u32 ratemask, struct ieee80211_channel *chan, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, - bool directed); -void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, - const u8 *src, const u8 *dst, - const u8 *ssid, size_t ssid_len, - const u8 *ie, size_t ie_len, - u32 ratemask, bool directed, u32 tx_flags, - struct ieee80211_channel *channel, bool scan); - + u32 flags); u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, enum nl80211_band band, u32 *basic_rates); @@ -2073,6 +2089,9 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, u32 cap); u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, const struct cfg80211_chan_def *chandef); +u8 *ieee80211_ie_build_he_cap(u8 *pos, + const struct ieee80211_sta_he_cap *he_cap, + u8 *end); int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef, const struct ieee80211_supported_band *sband, const u8 *srates, int srates_len, u32 *rates); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index fb73451ed85e..4fb2709cb527 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -3,6 +3,7 @@ * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright (C) 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -557,10 +558,19 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211); - if (!ops->hw_scan) + if (!ops->hw_scan) { wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | NL80211_FEATURE_AP_SCAN; - + /* + * if the driver behaves correctly using the probe request + * (template) from mac80211, then both of these should be + * supported even with hw scan - but let drivers opt in. + */ + wiphy_ext_feature_set(wiphy, + NL80211_EXT_FEATURE_SCAN_RANDOM_SN); + wiphy_ext_feature_set(wiphy, + NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT); + } if (!ops->set_key) wiphy->flags |= WIPHY_FLAG_IBSS_RSN; @@ -588,8 +598,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, local->hw.queues = 1; local->hw.max_rates = 1; local->hw.max_report_rates = 0; - local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF; - local->hw.max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF; + local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HT; + local->hw.max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HT; local->hw.offchannel_tx_hw_queue = IEEE80211_INVAL_HW_QUEUE; local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; @@ -816,7 +826,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) int result, i; enum nl80211_band band; int channels, max_bitrates; - bool supp_ht, supp_vht; + bool supp_ht, supp_vht, supp_he; netdev_features_t feature_whitelist; struct cfg80211_chan_def dflt_chandef = {}; @@ -896,6 +906,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) max_bitrates = 0; supp_ht = false; supp_vht = false; + supp_he = false; for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *sband; @@ -922,6 +933,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) supp_ht = supp_ht || sband->ht_cap.ht_supported; supp_vht = supp_vht || sband->vht_cap.vht_supported; + if (!supp_he) + supp_he = !!ieee80211_get_he_sta_cap(sband); + if (!sband->ht_cap.ht_supported) continue; @@ -1011,6 +1025,18 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->scan_ies_len += 2 + sizeof(struct ieee80211_vht_cap); + /* HE cap element is variable in size - set len to allow max size */ + /* + * TODO: 1 is added at the end of the calculation to accommodate for + * the temporary placing of the HE capabilities IE under EXT. + * Remove it once it is placed in the final place. + */ + if (supp_he) + local->scan_ies_len += + 2 + sizeof(struct ieee80211_he_cap_elem) + + sizeof(struct ieee80211_he_mcs_nss_supp) + + IEEE80211_HE_PPE_THRES_MAX_LEN + 1; + if (!local->ops->hw_scan) { /* For hw_scan, driver needs to set these up. */ local->hw.wiphy->max_scan_ssids = 4; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a59187c016e0..7fb9957359a3 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -149,6 +149,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, const struct ieee80211_ht_operation *ht_oper, const struct ieee80211_vht_operation *vht_oper, + const struct ieee80211_he_operation *he_oper, struct cfg80211_chan_def *chandef, bool tracking) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -207,7 +208,27 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } vht_chandef = *chandef; - if (!ieee80211_chandef_vht_oper(vht_oper, &vht_chandef)) { + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) && he_oper && + (le32_to_cpu(he_oper->he_oper_params) & + IEEE80211_HE_OPERATION_VHT_OPER_INFO)) { + struct ieee80211_vht_operation he_oper_vht_cap; + + /* + * Set only first 3 bytes (other 2 aren't used in + * ieee80211_chandef_vht_oper() anyway) + */ + memcpy(&he_oper_vht_cap, he_oper->optional, 3); + he_oper_vht_cap.basic_mcs_set = cpu_to_le16(0); + + if (!ieee80211_chandef_vht_oper(&he_oper_vht_cap, + &vht_chandef)) { + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) + sdata_info(sdata, + "HE AP VHT information is invalid, disable HE\n"); + ret = IEEE80211_STA_DISABLE_HE; + goto out; + } + } else if (!ieee80211_chandef_vht_oper(vht_oper, &vht_chandef)) { if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, "AP VHT information is invalid, disable VHT\n"); @@ -300,12 +321,14 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, const struct ieee80211_ht_cap *ht_cap, const struct ieee80211_ht_operation *ht_oper, const struct ieee80211_vht_operation *vht_oper, + const struct ieee80211_he_operation *he_oper, const u8 *bssid, u32 *changed) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct ieee80211_supported_band *sband; - struct ieee80211_channel *chan; + struct ieee80211_channel *chan = sdata->vif.bss_conf.chandef.chan; + struct ieee80211_supported_band *sband = + local->hw.wiphy->bands[chan->band]; struct cfg80211_chan_def chandef; u16 ht_opmode; u32 flags; @@ -320,6 +343,11 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT) vht_oper = NULL; + /* don't check HE if we associated as non-HE station */ + if (ifmgd->flags & IEEE80211_STA_DISABLE_HE || + !ieee80211_get_he_sta_cap(sband)) + he_oper = NULL; + if (WARN_ON_ONCE(!sta)) return -EINVAL; @@ -333,12 +361,9 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.ht_operation_mode = ht_opmode; } - chan = sdata->vif.bss_conf.chandef.chan; - sband = local->hw.wiphy->bands[chan->band]; - - /* calculate new channel (type) based on HT/VHT operation IEs */ + /* calculate new channel (type) based on HT/VHT/HE operation IEs */ flags = ieee80211_determine_chantype(sdata, sband, chan, - ht_oper, vht_oper, + ht_oper, vht_oper, he_oper, &chandef, true); /* @@ -582,6 +607,34 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, ieee80211_ie_build_vht_cap(pos, &vht_cap, cap); } +/* This function determines HE capability flags for the association + * and builds the IE. + */ +static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + struct ieee80211_supported_band *sband) +{ + u8 *pos; + const struct ieee80211_sta_he_cap *he_cap = NULL; + u8 he_cap_size; + + he_cap = ieee80211_get_he_sta_cap(sband); + if (!he_cap) + return; + + /* + * TODO: the 1 added is because this temporarily is under the EXTENSION + * IE. Get rid of it when it moves. + */ + he_cap_size = + 2 + 1 + sizeof(he_cap->he_cap_elem) + + ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) + + ieee80211_he_ppe_size(he_cap->ppe_thres[0], + he_cap->he_cap_elem.phy_cap_info); + pos = skb_put(skb, he_cap_size); + ieee80211_ie_build_he_cap(pos, he_cap, pos + he_cap_size); +} + static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; @@ -643,6 +696,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) 2 + 2 * sband->n_channels + /* supported channels */ 2 + sizeof(struct ieee80211_ht_cap) + /* HT */ 2 + sizeof(struct ieee80211_vht_cap) + /* VHT */ + 2 + 1 + sizeof(struct ieee80211_he_cap_elem) + /* HE */ + sizeof(struct ieee80211_he_mcs_nss_supp) + + IEEE80211_HE_PPE_THRES_MAX_LEN + assoc_data->ie_len + /* extra IEs */ (assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) + 9, /* WMM */ @@ -827,11 +883,41 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) offset = noffset; } + /* if present, add any custom IEs that go before HE */ + if (assoc_data->ie_len) { + static const u8 before_he[] = { + /* + * no need to list the ones split off before VHT + * or generated here + */ + WLAN_EID_OPMODE_NOTIF, + WLAN_EID_EXTENSION, WLAN_EID_EXT_FUTURE_CHAN_GUIDANCE, + /* 11ai elements */ + WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_SESSION, + WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_PUBLIC_KEY, + WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_KEY_CONFIRM, + WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_HLP_CONTAINER, + WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN, + /* TODO: add 11ah/11aj/11ak elements */ + }; + + /* RIC already taken above, so no need to handle here anymore */ + noffset = ieee80211_ie_split(assoc_data->ie, assoc_data->ie_len, + before_he, ARRAY_SIZE(before_he), + offset); + pos = skb_put(skb, noffset - offset); + memcpy(pos, assoc_data->ie + offset, noffset - offset); + offset = noffset; + } + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) ieee80211_add_vht_ie(sdata, skb, sband, &assoc_data->ap_vht_cap); - /* if present, add any custom non-vendor IEs that go after HT */ + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) + ieee80211_add_he_ie(sdata, skb, sband); + + /* if present, add any custom non-vendor IEs that go after HE */ if (assoc_data->ie_len) { noffset = ieee80211_ie_split_vendor(assoc_data->ie, assoc_data->ie_len, @@ -898,6 +984,11 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + /* Don't send NDPs when STA is connected HE */ + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) + return; + skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, !ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP)); if (!skb) @@ -929,6 +1020,10 @@ static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) return; + /* Don't send NDPs when connected HE */ + if (!(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE)) + return; + skb = dev_alloc_skb(local->hw.extra_tx_headroom + 30); if (!skb) return; @@ -1700,9 +1795,11 @@ static void ieee80211_sta_handle_tspec_ac_params_wk(struct work_struct *work) } /* MLME */ -static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - const u8 *wmm_param, size_t wmm_param_len) +static bool +ieee80211_sta_wmm_params(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const u8 *wmm_param, size_t wmm_param_len, + const struct ieee80211_mu_edca_param_set *mu_edca) { struct ieee80211_tx_queue_params params[IEEE80211_NUM_ACS]; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -1749,6 +1846,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, sdata->wmm_acm |= BIT(1) | BIT(2); /* BK/- */ if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK) uapsd = true; + params[ac].mu_edca = !!mu_edca; + if (mu_edca) + params[ac].mu_edca_param_rec = mu_edca->ac_bk; break; case 2: /* AC_VI */ ac = IEEE80211_AC_VI; @@ -1756,6 +1856,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, sdata->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */ if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI) uapsd = true; + params[ac].mu_edca = !!mu_edca; + if (mu_edca) + params[ac].mu_edca_param_rec = mu_edca->ac_vi; break; case 3: /* AC_VO */ ac = IEEE80211_AC_VO; @@ -1763,6 +1866,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, sdata->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */ if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO) uapsd = true; + params[ac].mu_edca = !!mu_edca; + if (mu_edca) + params[ac].mu_edca_param_rec = mu_edca->ac_vo; break; case 0: /* AC_BE */ default: @@ -1771,6 +1877,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, sdata->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */ if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE) uapsd = true; + params[ac].mu_edca = !!mu_edca; + if (mu_edca) + params[ac].mu_edca_param_rec = mu_edca->ac_be; break; } @@ -2219,6 +2328,20 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, ieee80211_sta_reset_conn_monitor(sdata); } +static void ieee80211_mlme_send_probe_req(struct ieee80211_sub_if_data *sdata, + const u8 *src, const u8 *dst, + const u8 *ssid, size_t ssid_len, + struct ieee80211_channel *channel) +{ + struct sk_buff *skb; + + skb = ieee80211_build_probe_req(sdata, src, dst, (u32)-1, channel, + ssid, ssid_len, NULL, 0, + IEEE80211_PROBE_FLAG_DIRECTED); + if (skb) + ieee80211_tx_skb(sdata, skb); +} + static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -2265,10 +2388,9 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) else ssid_len = ssid[1]; - ieee80211_send_probe_req(sdata, sdata->vif.addr, dst, - ssid + 2, ssid_len, NULL, - 0, (u32) -1, true, 0, - ifmgd->associated->channel, false); + ieee80211_mlme_send_probe_req(sdata, sdata->vif.addr, dst, + ssid + 2, ssid_len, + ifmgd->associated->channel); rcu_read_unlock(); } @@ -2370,7 +2492,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, skb = ieee80211_build_probe_req(sdata, sdata->vif.addr, cbss->bssid, (u32) -1, cbss->channel, ssid + 2, ssid_len, - NULL, 0, true); + NULL, 0, IEEE80211_PROBE_FLAG_DIRECTED); rcu_read_unlock(); return skb; @@ -3008,6 +3130,25 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, goto out; } + /* + * If AP doesn't support HT, or it doesn't have HE mandatory IEs, mark + * HE as disabled. If on the 5GHz band, make sure it supports VHT. + */ + if (ifmgd->flags & IEEE80211_STA_DISABLE_HT || + (sband->band == NL80211_BAND_5GHZ && + ifmgd->flags & IEEE80211_STA_DISABLE_VHT) || + (!elems.he_cap && !elems.he_operation)) + ifmgd->flags |= IEEE80211_STA_DISABLE_HE; + + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) && + (!elems.he_cap || !elems.he_operation)) { + mutex_unlock(&sdata->local->sta_mtx); + sdata_info(sdata, + "HE AP is missing HE capability/operation\n"); + ret = false; + goto out; + } + /* Set up internal HT/VHT capabilities */ if (elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, @@ -3017,6 +3158,48 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, elems.vht_cap_elem, sta); + if (elems.he_operation && !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) && + elems.he_cap) { + ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, + elems.he_cap, + elems.he_cap_len, + sta); + + bss_conf->he_support = sta->sta.he_cap.has_he; + } else { + bss_conf->he_support = false; + } + + if (bss_conf->he_support) { + u32 he_oper_params = + le32_to_cpu(elems.he_operation->he_oper_params); + + bss_conf->bss_color = he_oper_params & + IEEE80211_HE_OPERATION_BSS_COLOR_MASK; + bss_conf->htc_trig_based_pkt_ext = + (he_oper_params & + IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK) << + IEEE80211_HE_OPERATION_DFLT_PE_DURATION_OFFSET; + bss_conf->frame_time_rts_th = + (he_oper_params & + IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK) << + IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET; + + bss_conf->multi_sta_back_32bit = + sta->sta.he_cap.he_cap_elem.mac_cap_info[2] & + IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP; + + bss_conf->ack_enabled = + sta->sta.he_cap.he_cap_elem.mac_cap_info[2] & + IEEE80211_HE_MAC_CAP2_ACK_EN; + + bss_conf->uora_exists = !!elems.uora_element; + if (elems.uora_element) + bss_conf->uora_ocw_range = elems.uora_element[0]; + + /* TODO: OPEN: what happens if BSS color disable is set? */ + } + /* * Some APs, e.g. Netgear WNDR3700, report invalid HT operation data * in their association response, so ignore that data for our own @@ -3076,7 +3259,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (ifmgd->flags & IEEE80211_STA_DISABLE_WMM) { ieee80211_set_wmm_default(sdata, false, false); } else if (!ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, - elems.wmm_param_len)) { + elems.wmm_param_len, + elems.mu_edca_param_set)) { /* still enable QoS since we might have HT/VHT */ ieee80211_set_wmm_default(sdata, false, true); /* set the disable-WMM flag in this case to disable @@ -3590,7 +3774,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (!(ifmgd->flags & IEEE80211_STA_DISABLE_WMM) && ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, - elems.wmm_param_len)) + elems.wmm_param_len, + elems.mu_edca_param_set)) changed |= BSS_CHANGED_QOS; /* @@ -3629,7 +3814,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (ieee80211_config_bw(sdata, sta, elems.ht_cap_elem, elems.ht_operation, - elems.vht_operation, bssid, &changed)) { + elems.vht_operation, elems.he_operation, + bssid, &changed)) { mutex_unlock(&local->sta_mtx); sdata_info(sdata, "failed to follow AP %pM bandwidth change, disconnect\n", @@ -4266,6 +4452,68 @@ static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata, return chains; } +static bool +ieee80211_verify_sta_he_mcs_support(struct ieee80211_supported_band *sband, + const struct ieee80211_he_operation *he_op) +{ + const struct ieee80211_sta_he_cap *sta_he_cap = + ieee80211_get_he_sta_cap(sband); + u16 ap_min_req_set; + int i; + + if (!sta_he_cap || !he_op) + return false; + + ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set); + + /* Need to go over for 80MHz, 160MHz and for 80+80 */ + for (i = 0; i < 3; i++) { + const struct ieee80211_he_mcs_nss_supp *sta_mcs_nss_supp = + &sta_he_cap->he_mcs_nss_supp; + u16 sta_mcs_map_rx = + le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i]); + u16 sta_mcs_map_tx = + le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i + 1]); + u8 nss; + bool verified = true; + + /* + * For each band there is a maximum of 8 spatial streams + * possible. Each of the sta_mcs_map_* is a 16-bit struct built + * of 2 bits per NSS (1-8), with the values defined in enum + * ieee80211_he_mcs_support. Need to make sure STA TX and RX + * capabilities aren't less than the AP's minimum requirements + * for this HE BSS per SS. + * It is enough to find one such band that meets the reqs. + */ + for (nss = 8; nss > 0; nss--) { + u8 sta_rx_val = (sta_mcs_map_rx >> (2 * (nss - 1))) & 3; + u8 sta_tx_val = (sta_mcs_map_tx >> (2 * (nss - 1))) & 3; + u8 ap_val = (ap_min_req_set >> (2 * (nss - 1))) & 3; + + if (ap_val == IEEE80211_HE_MCS_NOT_SUPPORTED) + continue; + + /* + * Make sure the HE AP doesn't require MCSs that aren't + * supported by the client + */ + if (sta_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED || + sta_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED || + (ap_val > sta_rx_val) || (ap_val > sta_tx_val)) { + verified = false; + break; + } + } + + if (verified) + return true; + } + + /* If here, STA doesn't meet AP's HE min requirements */ + return false; +} + static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, struct cfg80211_bss *cbss) { @@ -4274,6 +4522,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, const struct ieee80211_ht_cap *ht_cap = NULL; const struct ieee80211_ht_operation *ht_oper = NULL; const struct ieee80211_vht_operation *vht_oper = NULL; + const struct ieee80211_he_operation *he_oper = NULL; struct ieee80211_supported_band *sband; struct cfg80211_chan_def chandef; int ret; @@ -4329,6 +4578,24 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, } } + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) && + ieee80211_get_he_sta_cap(sband)) { + const struct cfg80211_bss_ies *ies; + const u8 *he_oper_ie; + + ies = rcu_dereference(cbss->ies); + he_oper_ie = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, + ies->data, ies->len); + if (he_oper_ie && + he_oper_ie[1] == ieee80211_he_oper_size(&he_oper_ie[3])) + he_oper = (void *)(he_oper_ie + 3); + else + he_oper = NULL; + + if (!ieee80211_verify_sta_he_mcs_support(sband, he_oper)) + ifmgd->flags |= IEEE80211_STA_DISABLE_HE; + } + /* Allow VHT if at least one channel on the sband supports 80 MHz */ have_80mhz = false; for (i = 0; i < sband->n_channels; i++) { @@ -4345,7 +4612,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= ieee80211_determine_chantype(sdata, sband, cbss->channel, - ht_oper, vht_oper, + ht_oper, vht_oper, he_oper, &chandef, false); sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss), @@ -4751,8 +5018,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104) { ifmgd->flags |= IEEE80211_STA_DISABLE_HT; ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + ifmgd->flags |= IEEE80211_STA_DISABLE_HE; netdev_info(sdata->dev, - "disabling HT/VHT due to WEP/TKIP use\n"); + "disabling HE/HT/VHT due to WEP/TKIP use\n"); } } diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index f1d40b6645ff..8ef4153cd299 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -262,7 +262,7 @@ static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc, if (roc->mgmt_tx_cookie) { if (!WARN_ON(!roc->frame)) { ieee80211_tx_skb_tid_band(roc->sdata, roc->frame, 7, - roc->chan->band); + roc->chan->band, 0); roc->frame = NULL; } } else { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 0a38cc1cbebc..a16ba568e2a3 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -175,6 +175,20 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, len += 12; } + if (status->encoding == RX_ENC_HE && + status->flag & RX_FLAG_RADIOTAP_HE) { + len = ALIGN(len, 2); + len += 12; + BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_he) != 12); + } + + if (status->encoding == RX_ENC_HE && + status->flag & RX_FLAG_RADIOTAP_HE_MU) { + len = ALIGN(len, 2); + len += 12; + BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_he_mu) != 12); + } + if (status->chains) { /* antenna and antenna signal fields */ len += 2 * hweight8(status->chains); @@ -263,6 +277,19 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, int mpdulen, chain; unsigned long chains = status->chains; struct ieee80211_vendor_radiotap rtap = {}; + struct ieee80211_radiotap_he he = {}; + struct ieee80211_radiotap_he_mu he_mu = {}; + + if (status->flag & RX_FLAG_RADIOTAP_HE) { + he = *(struct ieee80211_radiotap_he *)skb->data; + skb_pull(skb, sizeof(he)); + WARN_ON_ONCE(status->encoding != RX_ENC_HE); + } + + if (status->flag & RX_FLAG_RADIOTAP_HE_MU) { + he_mu = *(struct ieee80211_radiotap_he_mu *)skb->data; + skb_pull(skb, sizeof(he_mu)); + } if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { rtap = *(struct ieee80211_vendor_radiotap *)skb->data; @@ -520,6 +547,89 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, *pos++ = flags; } + if (status->encoding == RX_ENC_HE && + status->flag & RX_FLAG_RADIOTAP_HE) { +#define HE_PREP(f, val) cpu_to_le16(FIELD_PREP(IEEE80211_RADIOTAP_HE_##f, val)) + + if (status->enc_flags & RX_ENC_FLAG_STBC_MASK) { + he.data6 |= HE_PREP(DATA6_NSTS, + FIELD_GET(RX_ENC_FLAG_STBC_MASK, + status->enc_flags)); + he.data3 |= HE_PREP(DATA3_STBC, 1); + } else { + he.data6 |= HE_PREP(DATA6_NSTS, status->nss); + } + +#define CHECK_GI(s) \ + BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_GI_##s != \ + (int)NL80211_RATE_INFO_HE_GI_##s) + + CHECK_GI(0_8); + CHECK_GI(1_6); + CHECK_GI(3_2); + + he.data3 |= HE_PREP(DATA3_DATA_MCS, status->rate_idx); + he.data3 |= HE_PREP(DATA3_DATA_DCM, status->he_dcm); + he.data3 |= HE_PREP(DATA3_CODING, + !!(status->enc_flags & RX_ENC_FLAG_LDPC)); + + he.data5 |= HE_PREP(DATA5_GI, status->he_gi); + + switch (status->bw) { + case RATE_INFO_BW_20: + he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ); + break; + case RATE_INFO_BW_40: + he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_40MHZ); + break; + case RATE_INFO_BW_80: + he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_80MHZ); + break; + case RATE_INFO_BW_160: + he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_160MHZ); + break; + case RATE_INFO_BW_HE_RU: +#define CHECK_RU_ALLOC(s) \ + BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_##s##T != \ + NL80211_RATE_INFO_HE_RU_ALLOC_##s + 4) + + CHECK_RU_ALLOC(26); + CHECK_RU_ALLOC(52); + CHECK_RU_ALLOC(106); + CHECK_RU_ALLOC(242); + CHECK_RU_ALLOC(484); + CHECK_RU_ALLOC(996); + CHECK_RU_ALLOC(2x996); + + he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + status->he_ru + 4); + break; + default: + WARN_ONCE(1, "Invalid SU BW %d\n", status->bw); + } + + /* ensure 2 byte alignment */ + while ((pos - (u8 *)rthdr) & 1) + pos++; + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE); + memcpy(pos, &he, sizeof(he)); + pos += sizeof(he); + } + + if (status->encoding == RX_ENC_HE && + status->flag & RX_FLAG_RADIOTAP_HE_MU) { + /* ensure 2 byte alignment */ + while ((pos - (u8 *)rthdr) & 1) + pos++; + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE_MU); + memcpy(pos, &he_mu, sizeof(he_mu)); + pos += sizeof(he_mu); + } + for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) { *pos++ = status->chain_signal[chain]; *pos++ = chain; @@ -613,6 +723,12 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, rcu_dereference(local->monitor_sdata); bool only_monitor = false; + if (status->flag & RX_FLAG_RADIOTAP_HE) + rtap_space += sizeof(struct ieee80211_radiotap_he); + + if (status->flag & RX_FLAG_RADIOTAP_HE_MU) + rtap_space += sizeof(struct ieee80211_radiotap_he_mu); + if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) { struct ieee80211_vendor_radiotap *rtap = (void *)origskb->data; @@ -3241,7 +3357,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) } __ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7, - status->band); + status->band, 0); } dev_kfree_skb(rx->skb); return RX_QUEUED; @@ -3386,8 +3502,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, status = IEEE80211_SKB_RXCB((rx->skb)); sband = rx->local->hw.wiphy->bands[status->band]; - if (!(status->encoding == RX_ENC_HT) && - !(status->encoding == RX_ENC_VHT)) + if (status->encoding == RX_ENC_LEGACY) rate = &sband->bitrates[status->rate_idx]; ieee80211_rx_cooked_monitor(rx, rate); @@ -4386,6 +4501,14 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, status->rate_idx, status->nss)) goto drop; break; + case RX_ENC_HE: + if (WARN_ONCE(status->rate_idx > 11 || + !status->nss || + status->nss > 8, + "Rate marked as an HE rate but data is invalid: MCS: %d, NSS: %d\n", + status->rate_idx, status->nss)) + goto drop; + break; default: WARN_ON_ONCE(1); /* fall through */ diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 2e917a6d239d..5d2a11777718 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -20,6 +20,7 @@ #include <net/sch_generic.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/random.h> #include <net/mac80211.h> #include "ieee80211_i.h" @@ -293,6 +294,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) struct cfg80211_chan_def chandef; u8 bands_used = 0; int i, ielen, n_chans; + u32 flags = 0; req = rcu_dereference_protected(local->scan_req, lockdep_is_held(&local->mtx)); @@ -331,12 +333,16 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) local->hw_scan_req->req.n_channels = n_chans; ieee80211_prepare_scan_chandef(&chandef, req->scan_width); + if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT) + flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT; + ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->req.ie, local->hw_scan_ies_bufsize, &local->hw_scan_req->ies, req->ie, req->ie_len, - bands_used, req->rates, &chandef); + bands_used, req->rates, &chandef, + flags); local->hw_scan_req->req.ie_len = ielen; local->hw_scan_req->req.no_cck = req->no_cck; ether_addr_copy(local->hw_scan_req->req.mac_addr, req->mac_addr); @@ -528,6 +534,35 @@ void ieee80211_run_deferred_scan(struct ieee80211_local *local) round_jiffies_relative(0)); } +static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata, + const u8 *src, const u8 *dst, + const u8 *ssid, size_t ssid_len, + const u8 *ie, size_t ie_len, + u32 ratemask, u32 flags, u32 tx_flags, + struct ieee80211_channel *channel) +{ + struct sk_buff *skb; + u32 txdata_flags = 0; + + skb = ieee80211_build_probe_req(sdata, src, dst, ratemask, channel, + ssid, ssid_len, + ie, ie_len, flags); + + if (skb) { + if (flags & IEEE80211_PROBE_FLAG_RANDOM_SN) { + struct ieee80211_hdr *hdr = (void *)skb->data; + u16 sn = get_random_u32(); + + txdata_flags |= IEEE80211_TX_NO_SEQNO; + hdr->seq_ctrl = + cpu_to_le16(IEEE80211_SN_TO_SEQ(sn)); + } + IEEE80211_SKB_CB(skb)->flags |= tx_flags; + ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band, + txdata_flags); + } +} + static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, unsigned long *next_delay) { @@ -535,7 +570,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata; struct cfg80211_scan_request *scan_req; enum nl80211_band band = local->hw.conf.chandef.chan->band; - u32 tx_flags; + u32 flags = 0, tx_flags; scan_req = rcu_dereference_protected(local->scan_req, lockdep_is_held(&local->mtx)); @@ -543,17 +578,21 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK; if (scan_req->no_cck) tx_flags |= IEEE80211_TX_CTL_NO_CCK_RATE; + if (scan_req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT) + flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT; + if (scan_req->flags & NL80211_SCAN_FLAG_RANDOM_SN) + flags |= IEEE80211_PROBE_FLAG_RANDOM_SN; sdata = rcu_dereference_protected(local->scan_sdata, lockdep_is_held(&local->mtx)); for (i = 0; i < scan_req->n_ssids; i++) - ieee80211_send_probe_req( + ieee80211_send_scan_probe_req( sdata, local->scan_addr, scan_req->bssid, scan_req->ssids[i].ssid, scan_req->ssids[i].ssid_len, scan_req->ie, scan_req->ie_len, - scan_req->rates[band], false, - tx_flags, local->hw.conf.chandef.chan, true); + scan_req->rates[band], flags, + tx_flags, local->hw.conf.chandef.chan); /* * After sending probe requests, wait for probe responses @@ -1141,6 +1180,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, u32 rate_masks[NUM_NL80211_BANDS] = {}; u8 bands_used = 0; u8 *ie; + u32 flags = 0; iebufsz = local->scan_ies_len + req->ie_len; @@ -1157,6 +1197,9 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, } } + if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT) + flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT; + ie = kcalloc(iebufsz, num_bands, GFP_KERNEL); if (!ie) { ret = -ENOMEM; @@ -1167,7 +1210,8 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, ieee80211_build_preq_ies(local, ie, num_bands * iebufsz, &sched_scan_ies, req->ie, - req->ie_len, bands_used, rate_masks, &chandef); + req->ie_len, bands_used, rate_masks, &chandef, + flags); ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies); if (ret == 0) { diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 6428f1ac37b6..f34202242d24 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1323,6 +1323,11 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid, struct ieee80211_tx_info *info; struct ieee80211_chanctx_conf *chanctx_conf; + /* Don't send NDPs when STA is connected HE */ + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE)) + return; + if (qos) { fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC | @@ -1391,7 +1396,7 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid, } info->band = chanctx_conf->def.chan->band; - ieee80211_xmit(sdata, sta, skb); + ieee80211_xmit(sdata, sta, skb, 0); rcu_read_unlock(); } @@ -1968,7 +1973,7 @@ sta_get_last_rx_stats(struct sta_info *sta) return stats; } -static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate, +static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate, struct rate_info *rinfo) { rinfo->bw = STA_STATS_GET(BW, rate); @@ -2005,6 +2010,14 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate, rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); break; } + case STA_STATS_RATE_TYPE_HE: + rinfo->flags = RATE_INFO_FLAGS_HE_MCS; + rinfo->mcs = STA_STATS_GET(HE_MCS, rate); + rinfo->nss = STA_STATS_GET(HE_NSS, rate); + rinfo->he_gi = STA_STATS_GET(HE_GI, rate); + rinfo->he_ru_alloc = STA_STATS_GET(HE_RU, rate); + rinfo->he_dcm = STA_STATS_GET(HE_DCM, rate); + break; } } @@ -2101,38 +2114,38 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, drv_sta_statistics(local, sdata, &sta->sta, sinfo); - sinfo->filled |= BIT(NL80211_STA_INFO_INACTIVE_TIME) | - BIT(NL80211_STA_INFO_STA_FLAGS) | - BIT(NL80211_STA_INFO_BSS_PARAM) | - BIT(NL80211_STA_INFO_CONNECTED_TIME) | - BIT(NL80211_STA_INFO_RX_DROP_MISC); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME) | + BIT_ULL(NL80211_STA_INFO_STA_FLAGS) | + BIT_ULL(NL80211_STA_INFO_BSS_PARAM) | + BIT_ULL(NL80211_STA_INFO_CONNECTED_TIME) | + BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC); if (sdata->vif.type == NL80211_IFTYPE_STATION) { sinfo->beacon_loss_count = sdata->u.mgd.beacon_loss_count; - sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_LOSS); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_LOSS); } sinfo->connected_time = ktime_get_seconds() - sta->last_connected; sinfo->inactive_time = jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta)); - if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) | - BIT(NL80211_STA_INFO_TX_BYTES)))) { + if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES64) | + BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) { sinfo->tx_bytes = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) sinfo->tx_bytes += sta->tx_stats.bytes[ac]; - sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64); } - if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_PACKETS))) { + if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_PACKETS))) { sinfo->tx_packets = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) sinfo->tx_packets += sta->tx_stats.packets[ac]; - sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS); } - if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) | - BIT(NL80211_STA_INFO_RX_BYTES)))) { + if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) | + BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) { sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats); if (sta->pcpu_rx_stats) { @@ -2144,10 +2157,10 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, } } - sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES64); } - if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) { + if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_PACKETS))) { sinfo->rx_packets = sta->rx_stats.packets; if (sta->pcpu_rx_stats) { for_each_possible_cpu(cpu) { @@ -2157,17 +2170,17 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->rx_packets += cpurxs->packets; } } - sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS); } - if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_RETRIES))) { + if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_RETRIES))) { sinfo->tx_retries = sta->status_stats.retry_count; - sinfo->filled |= BIT(NL80211_STA_INFO_TX_RETRIES); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES); } - if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_FAILED))) { + if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))) { sinfo->tx_failed = sta->status_stats.retry_failed; - sinfo->filled |= BIT(NL80211_STA_INFO_TX_FAILED); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED); } sinfo->rx_dropped_misc = sta->rx_stats.dropped; @@ -2182,23 +2195,23 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, if (sdata->vif.type == NL80211_IFTYPE_STATION && !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) { - sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_RX) | - BIT(NL80211_STA_INFO_BEACON_SIGNAL_AVG); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_RX) | + BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG); sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif); } if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) || ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) { - if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) { + if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL))) { sinfo->signal = (s8)last_rxstats->last_signal; - sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL); } if (!sta->pcpu_rx_stats && - !(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) { + !(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG))) { sinfo->signal_avg = -ewma_signal_read(&sta->rx_stats_avg.signal); - sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG); } } @@ -2207,11 +2220,11 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, * pcpu statistics */ if (last_rxstats->chains && - !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | - BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) { - sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL); + !(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL) | + BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) { + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL); if (!sta->pcpu_rx_stats) - sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); sinfo->chains = last_rxstats->chains; @@ -2223,15 +2236,15 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, } } - if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE))) { + if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) { sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &sinfo->txrate); - sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE); } - if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE))) { + if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE))) { if (sta_set_rate_info_rx(sta, &sinfo->rxrate) == 0) - sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE); } if (tidstats && !cfg80211_sinfo_alloc_tid_stats(sinfo, GFP_KERNEL)) { @@ -2244,18 +2257,18 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH - sinfo->filled |= BIT(NL80211_STA_INFO_LLID) | - BIT(NL80211_STA_INFO_PLID) | - BIT(NL80211_STA_INFO_PLINK_STATE) | - BIT(NL80211_STA_INFO_LOCAL_PM) | - BIT(NL80211_STA_INFO_PEER_PM) | - BIT(NL80211_STA_INFO_NONPEER_PM); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_LLID) | + BIT_ULL(NL80211_STA_INFO_PLID) | + BIT_ULL(NL80211_STA_INFO_PLINK_STATE) | + BIT_ULL(NL80211_STA_INFO_LOCAL_PM) | + BIT_ULL(NL80211_STA_INFO_PEER_PM) | + BIT_ULL(NL80211_STA_INFO_NONPEER_PM); sinfo->llid = sta->mesh->llid; sinfo->plid = sta->mesh->plid; sinfo->plink_state = sta->mesh->plink_state; if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { - sinfo->filled |= BIT(NL80211_STA_INFO_T_OFFSET); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_T_OFFSET); sinfo->t_offset = sta->mesh->t_offset; } sinfo->local_pm = sta->mesh->local_pm; @@ -2300,7 +2313,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, thr = sta_get_expected_throughput(sta); if (thr != 0) { - sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_EXPECTED_THROUGHPUT); sinfo->expected_throughput = thr; } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 81b35f623792..9a04327d71d1 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -170,7 +170,7 @@ struct tid_ampdu_tx { u8 dialog_token; u8 stop_initiator; bool tx_stop; - u8 buf_size; + u16 buf_size; u16 failed_bar_ssn; bool bar_pending; @@ -405,7 +405,7 @@ struct ieee80211_sta_rx_stats { int last_signal; u8 chains; s8 chain_signal_last[IEEE80211_MAX_CHAINS]; - u16 last_rate; + u32 last_rate; struct u64_stats_sync syncp; u64 bytes; u64 msdu[IEEE80211_NUM_TIDS + 1]; @@ -764,6 +764,7 @@ enum sta_stats_type { STA_STATS_RATE_TYPE_LEGACY, STA_STATS_RATE_TYPE_HT, STA_STATS_RATE_TYPE_VHT, + STA_STATS_RATE_TYPE_HE, }; #define STA_STATS_FIELD_HT_MCS GENMASK( 7, 0) @@ -771,9 +772,14 @@ enum sta_stats_type { #define STA_STATS_FIELD_LEGACY_BAND GENMASK( 7, 4) #define STA_STATS_FIELD_VHT_MCS GENMASK( 3, 0) #define STA_STATS_FIELD_VHT_NSS GENMASK( 7, 4) +#define STA_STATS_FIELD_HE_MCS GENMASK( 3, 0) +#define STA_STATS_FIELD_HE_NSS GENMASK( 7, 4) #define STA_STATS_FIELD_BW GENMASK(11, 8) #define STA_STATS_FIELD_SGI GENMASK(12, 12) #define STA_STATS_FIELD_TYPE GENMASK(15, 13) +#define STA_STATS_FIELD_HE_RU GENMASK(18, 16) +#define STA_STATS_FIELD_HE_GI GENMASK(20, 19) +#define STA_STATS_FIELD_HE_DCM GENMASK(21, 21) #define STA_STATS_FIELD(_n, _v) FIELD_PREP(STA_STATS_FIELD_ ## _n, _v) #define STA_STATS_GET(_n, _v) FIELD_GET(STA_STATS_FIELD_ ## _n, _v) @@ -782,7 +788,7 @@ enum sta_stats_type { static inline u32 sta_stats_encode_rate(struct ieee80211_rx_status *s) { - u16 r; + u32 r; r = STA_STATS_FIELD(BW, s->bw); @@ -804,6 +810,14 @@ static inline u32 sta_stats_encode_rate(struct ieee80211_rx_status *s) r |= STA_STATS_FIELD(LEGACY_BAND, s->band); r |= STA_STATS_FIELD(LEGACY_IDX, s->rate_idx); break; + case RX_ENC_HE: + r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_HE); + r |= STA_STATS_FIELD(HE_NSS, s->nss); + r |= STA_STATS_FIELD(HE_MCS, s->rate_idx); + r |= STA_STATS_FIELD(HE_GI, s->he_gi); + r |= STA_STATS_FIELD(HE_RU, s->he_ru); + r |= STA_STATS_FIELD(HE_DCM, s->he_dcm); + break; default: WARN_ON(1); return STA_STATS_RATE_INVALID; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 80a7edf8d314..0ab69a1964f8 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -92,7 +92,7 @@ STA_ENTRY \ __field(u16, tid) \ __field(u16, ssn) \ - __field(u8, buf_size) \ + __field(u16, buf_size) \ __field(bool, amsdu) \ __field(u16, timeout) \ __field(u16, action) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index fa1f1e63a264..6a79d564de35 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -825,6 +825,8 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) */ if (!ieee80211_is_data_qos(hdr->frame_control) || is_multicast_ether_addr(hdr->addr1)) { + if (tx->flags & IEEE80211_TX_NO_SEQNO) + return TX_CONTINUE; /* driver should assign sequence number */ info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; /* for pure STA mode without beacons, we can do it */ @@ -1854,7 +1856,7 @@ EXPORT_SYMBOL(ieee80211_tx_prepare_skb); */ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct sk_buff *skb, - bool txpending) + bool txpending, u32 txdata_flags) { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_data tx; @@ -1872,6 +1874,8 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, led_len = skb->len; res_prepare = ieee80211_tx_prepare(sdata, &tx, sta, skb); + tx.flags |= txdata_flags; + if (unlikely(res_prepare == TX_DROP)) { ieee80211_free_txskb(&local->hw, skb); return true; @@ -1933,7 +1937,8 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, } void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, struct sk_buff *skb) + struct sta_info *sta, struct sk_buff *skb, + u32 txdata_flags) { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); @@ -1968,7 +1973,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, } ieee80211_set_qos_hdr(sdata, skb); - ieee80211_tx(sdata, sta, skb, false); + ieee80211_tx(sdata, sta, skb, false, txdata_flags); } static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, @@ -2289,7 +2294,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, if (!ieee80211_parse_tx_radiotap(local, skb)) goto fail_rcu; - ieee80211_xmit(sdata, NULL, skb); + ieee80211_xmit(sdata, NULL, skb, 0); rcu_read_unlock(); return NETDEV_TX_OK; @@ -3648,7 +3653,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, ieee80211_tx_stats(dev, skb->len); - ieee80211_xmit(sdata, sta, skb); + ieee80211_xmit(sdata, sta, skb, 0); } goto out; out_free: @@ -3867,7 +3872,7 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, return true; } info->band = chanctx_conf->def.chan->band; - result = ieee80211_tx(sdata, NULL, skb, true); + result = ieee80211_tx(sdata, NULL, skb, true, 0); } else { struct sk_buff_head skbs; @@ -4783,7 +4788,7 @@ EXPORT_SYMBOL(ieee80211_unreserve_tid); void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, - enum nl80211_band band) + enum nl80211_band band, u32 txdata_flags) { int ac = ieee80211_ac_from_tid(tid); @@ -4800,7 +4805,7 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, */ local_bh_disable(); IEEE80211_SKB_CB(skb)->band = band; - ieee80211_xmit(sdata, NULL, skb); + ieee80211_xmit(sdata, NULL, skb, txdata_flags); local_bh_enable(); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 5e2e511c4a6f..3e68132a41fa 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1095,6 +1095,21 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, if (elen >= sizeof(*elems->max_idle_period_ie)) elems->max_idle_period_ie = (void *)pos; break; + case WLAN_EID_EXTENSION: + if (pos[0] == WLAN_EID_EXT_HE_MU_EDCA && + elen >= (sizeof(*elems->mu_edca_param_set) + 1)) { + elems->mu_edca_param_set = (void *)&pos[1]; + } else if (pos[0] == WLAN_EID_EXT_HE_CAPABILITY) { + elems->he_cap = (void *)&pos[1]; + elems->he_cap_len = elen - 1; + } else if (pos[0] == WLAN_EID_EXT_HE_OPERATION && + elen >= sizeof(*elems->he_operation) && + elen >= ieee80211_he_oper_size(&pos[1])) { + elems->he_operation = (void *)&pos[1]; + } else if (pos[0] == WLAN_EID_EXT_UORA && elen >= 1) { + elems->uora_element = (void *)&pos[1]; + } + break; default: break; } @@ -1353,9 +1368,10 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, enum nl80211_band band, u32 rate_mask, struct cfg80211_chan_def *chandef, - size_t *offset) + size_t *offset, u32 flags) { struct ieee80211_supported_band *sband; + const struct ieee80211_sta_he_cap *he_cap; u8 *pos = buffer, *end = buffer + buffer_len; size_t noffset; int supp_rates_len, i; @@ -1433,6 +1449,9 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, chandef->chan->center_freq); } + if (flags & IEEE80211_PROBE_FLAG_MIN_CONTENT) + goto done; + /* insert custom IEs that go before HT */ if (ie && ie_len) { static const u8 before_ht[] = { @@ -1460,11 +1479,6 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, sband->ht_cap.cap); } - /* - * If adding more here, adjust code in main.c - * that calculates local->scan_ies_len. - */ - /* insert custom IEs that go before VHT */ if (ie && ie_len) { static const u8 before_vht[] = { @@ -1507,9 +1521,43 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, sband->vht_cap.cap); } + /* insert custom IEs that go before HE */ + if (ie && ie_len) { + static const u8 before_he[] = { + /* + * no need to list the ones split off before VHT + * or generated here + */ + WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_REQ_PARAMS, + WLAN_EID_AP_CSN, + /* TODO: add 11ah/11aj/11ak elements */ + }; + noffset = ieee80211_ie_split(ie, ie_len, + before_he, ARRAY_SIZE(before_he), + *offset); + if (end - pos < noffset - *offset) + goto out_err; + memcpy(pos, ie + *offset, noffset - *offset); + pos += noffset - *offset; + *offset = noffset; + } + + he_cap = ieee80211_get_he_sta_cap(sband); + if (he_cap) { + pos = ieee80211_ie_build_he_cap(pos, he_cap, end); + if (!pos) + goto out_err; + } + + /* + * If adding more here, adjust code in main.c + * that calculates local->scan_ies_len. + */ + return pos - buffer; out_err: WARN_ONCE(1, "not enough space for preq IEs\n"); + done: return pos - buffer; } @@ -1518,7 +1566,8 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, struct ieee80211_scan_ies *ie_desc, const u8 *ie, size_t ie_len, u8 bands_used, u32 *rate_masks, - struct cfg80211_chan_def *chandef) + struct cfg80211_chan_def *chandef, + u32 flags) { size_t pos = 0, old_pos = 0, custom_ie_offset = 0; int i; @@ -1533,7 +1582,8 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, ie, ie_len, i, rate_masks[i], chandef, - &custom_ie_offset); + &custom_ie_offset, + flags); ie_desc->ies[i] = buffer + old_pos; ie_desc->len[i] = pos - old_pos; old_pos = pos; @@ -1561,7 +1611,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *chan, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, - bool directed) + u32 flags) { struct ieee80211_local *local = sdata->local; struct cfg80211_chan_def chandef; @@ -1577,7 +1627,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, * badly-behaved APs don't respond when this parameter is included. */ chandef.width = sdata->vif.bss_conf.chandef.width; - if (directed) + if (flags & IEEE80211_PROBE_FLAG_DIRECTED) chandef.chan = NULL; else chandef.chan = chan; @@ -1591,7 +1641,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, ies_len = ieee80211_build_preq_ies(local, skb_tail_pointer(skb), skb_tailroom(skb), &dummy_ie_desc, ie, ie_len, BIT(chan->band), - rate_masks, &chandef); + rate_masks, &chandef, flags); skb_put(skb, ies_len); if (dst) { @@ -1605,27 +1655,6 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, return skb; } -void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, - const u8 *src, const u8 *dst, - const u8 *ssid, size_t ssid_len, - const u8 *ie, size_t ie_len, - u32 ratemask, bool directed, u32 tx_flags, - struct ieee80211_channel *channel, bool scan) -{ - struct sk_buff *skb; - - skb = ieee80211_build_probe_req(sdata, src, dst, ratemask, channel, - ssid, ssid_len, - ie, ie_len, directed); - if (skb) { - IEEE80211_SKB_CB(skb)->flags |= tx_flags; - if (scan) - ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band); - else - ieee80211_tx_skb(sdata, skb); - } -} - u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, enum nl80211_band band, u32 *basic_rates) @@ -2412,6 +2441,72 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, return pos; } +u8 *ieee80211_ie_build_he_cap(u8 *pos, + const struct ieee80211_sta_he_cap *he_cap, + u8 *end) +{ + u8 n; + u8 ie_len; + u8 *orig_pos = pos; + + /* Make sure we have place for the IE */ + /* + * TODO: the 1 added is because this temporarily is under the EXTENSION + * IE. Get rid of it when it moves. + */ + if (!he_cap) + return orig_pos; + + n = ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem); + ie_len = 2 + 1 + + sizeof(he_cap->he_cap_elem) + n + + ieee80211_he_ppe_size(he_cap->ppe_thres[0], + he_cap->he_cap_elem.phy_cap_info); + + if ((end - pos) < ie_len) + return orig_pos; + + *pos++ = WLAN_EID_EXTENSION; + pos++; /* We'll set the size later below */ + *pos++ = WLAN_EID_EXT_HE_CAPABILITY; + + /* Fixed data */ + memcpy(pos, &he_cap->he_cap_elem, sizeof(he_cap->he_cap_elem)); + pos += sizeof(he_cap->he_cap_elem); + + memcpy(pos, &he_cap->he_mcs_nss_supp, n); + pos += n; + + /* Check if PPE Threshold should be present */ + if ((he_cap->he_cap_elem.phy_cap_info[6] & + IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) == 0) + goto end; + + /* + * Calculate how many PPET16/PPET8 pairs are to come. Algorithm: + * (NSS_M1 + 1) x (num of 1 bits in RU_INDEX_BITMASK) + */ + n = hweight8(he_cap->ppe_thres[0] & + IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK); + n *= (1 + ((he_cap->ppe_thres[0] & IEEE80211_PPE_THRES_NSS_MASK) >> + IEEE80211_PPE_THRES_NSS_POS)); + + /* + * Each pair is 6 bits, and we need to add the 7 "header" bits to the + * total size. + */ + n = (n * IEEE80211_PPE_THRES_INFO_PPET_SIZE * 2) + 7; + n = DIV_ROUND_UP(n, 8); + + /* Copy PPE Thresholds */ + memcpy(pos, &he_cap->ppe_thres, n); + pos += n; + +end: + orig_pos[1] = (pos - orig_pos) - 2; + return pos; +} + u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, const struct cfg80211_chan_def *chandef, u16 prot_mode, bool rifs_mode) diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index a1086bdec242..5423b197d98a 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -32,7 +32,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, __be32 mask = 0; /* we're only interested in locally generated packets */ - if (skb->sk == NULL) + if (skb->sk == NULL || !net_eq(nf_ct_net(ct), sock_net(skb->sk))) goto out; if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) goto out; diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index dc61399e30be..a8c5c846aec1 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -132,9 +132,10 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb, } EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header); -void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk) +void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m, + struct sock *sk) { - if (!sk || !sk_fullsock(sk)) + if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk))) return; read_lock_bh(&sk->sk_callback_lock); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 46f9df99d276..86df2a1666fd 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -108,6 +108,7 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) struct flowi fl; unsigned int hh_len; struct dst_entry *dst; + struct sock *sk = skb->sk; int err; err = xfrm_decode_session(skb, &fl, family); @@ -119,7 +120,10 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); - dst = xfrm_lookup(net, dst, &fl, skb->sk, 0); + if (sk && !net_eq(net, sock_net(sk))) + sk = NULL; + + dst = xfrm_lookup(net, dst, &fl, sk, 0); if (IS_ERR(dst)) return PTR_ERR(dst); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 896d4a36081d..3f211e1025c1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -14,6 +14,7 @@ #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/vmalloc.h> +#include <linux/rhashtable.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nf_tables.h> diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 1105a23bda5e..2b94dcc43456 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -107,7 +107,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr, break; case NFT_META_SKUID: sk = skb_to_full_sk(skb); - if (!sk || !sk_fullsock(sk)) + if (!sk || !sk_fullsock(sk) || + !net_eq(nft_net(pkt), sock_net(sk))) goto err; read_lock_bh(&sk->sk_callback_lock); @@ -123,7 +124,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr, break; case NFT_META_SKGID: sk = skb_to_full_sk(skb); - if (!sk || !sk_fullsock(sk)) + if (!sk || !sk_fullsock(sk) || + !net_eq(nft_net(pkt), sock_net(sk))) goto err; read_lock_bh(&sk->sk_callback_lock); @@ -214,7 +216,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr, #ifdef CONFIG_CGROUP_NET_CLASSID case NFT_META_CGROUP: sk = skb_to_full_sk(skb); - if (!sk || !sk_fullsock(sk)) + if (!sk || !sk_fullsock(sk) || + !net_eq(nft_net(pkt), sock_net(sk))) goto err; *dest = sock_cgroup_classid(&sk->sk_cgrp_data); break; diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 74e1b3bd6954..998c2b546f6d 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -23,6 +23,9 @@ static void nft_socket_eval(const struct nft_expr *expr, struct sock *sk = skb->sk; u32 *dest = ®s->data[priv->dreg]; + if (sk && !net_eq(nft_net(pkt), sock_net(sk))) + sk = NULL; + if (!sk) switch(nft_pf(pkt)) { case NFPROTO_IPV4: @@ -39,7 +42,7 @@ static void nft_socket_eval(const struct nft_expr *expr, return; } - if(!sk) { + if (!sk) { nft_reg_store8(dest, 0); return; } diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 7df2dece57d3..5d92e1781980 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -72,8 +72,9 @@ static bool cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_cgroup_info_v0 *info = par->matchinfo; + struct sock *sk = skb->sk; - if (skb->sk == NULL || !sk_fullsock(skb->sk)) + if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk))) return false; return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^ @@ -85,8 +86,9 @@ static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_cgroup_info_v1 *info = par->matchinfo; struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data; struct cgroup *ancestor = info->priv; + struct sock *sk = skb->sk; - if (!skb->sk || !sk_fullsock(skb->sk)) + if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk))) return false; if (ancestor) diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 3d705c688a27..46686fb73784 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -67,7 +67,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk = skb_to_full_sk(skb); struct net *net = xt_net(par); - if (sk == NULL || sk->sk_socket == NULL) + if (!sk || !sk->sk_socket || !net_eq(net, sock_net(sk))) return (info->match ^ info->invert) == 0; else if (info->match & info->invert & XT_OWNER_SOCKET) /* diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 07085c22b19c..f44de4bc2100 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -265,7 +265,8 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) } /* use TTL as seen before forwarding */ - if (xt_out(par) != NULL && skb->sk == NULL) + if (xt_out(par) != NULL && + (!skb->sk || !net_eq(net, sock_net(skb->sk)))) ttl++; spin_lock_bh(&recent_lock); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 5c0779c4fa3c..0472f3472842 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -56,8 +56,12 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, struct sk_buff *pskb = (struct sk_buff *)skb; struct sock *sk = skb->sk; + if (!net_eq(xt_net(par), sock_net(sk))) + sk = NULL; + if (!sk) sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par)); + if (sk) { bool wildcard; bool transparent = true; @@ -113,8 +117,12 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) struct sk_buff *pskb = (struct sk_buff *)skb; struct sock *sk = skb->sk; + if (!net_eq(xt_net(par), sock_net(sk))) + sk = NULL; + if (!sk) sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par)); + if (sk) { bool wildcard; bool transparent = true; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 492ab0c36f7c..391c4073a6dc 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2516,7 +2516,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct ovs_tunnel_info *ovs_tun; struct nlattr *a; int err = 0, start, opts_type; + __be16 dst_opt_type; + dst_opt_type = 0; ovs_match_init(&match, &key, true, NULL); opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log); if (opts_type < 0) @@ -2528,10 +2530,13 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, err = validate_geneve_opts(&key); if (err < 0) return err; + dst_opt_type = TUNNEL_GENEVE_OPT; break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + dst_opt_type = TUNNEL_VXLAN_OPT; break; case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: + dst_opt_type = TUNNEL_ERSPAN_OPT; break; } } @@ -2574,7 +2579,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, */ ip_tunnel_info_opts_set(tun_info, TUN_METADATA_OPTS(&key, key.tun_opts_len), - key.tun_opts_len); + key.tun_opts_len, dst_opt_type); add_nested_action_end(*sfa, start); return err; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 8a925c72db5f..55bc96b610e8 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -136,16 +136,18 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, pedit_net_id); struct nlattr *tb[TCA_PEDIT_MAX + 1]; - struct nlattr *pattr; - struct tc_pedit *parm; - int ret = 0, err; - struct tcf_pedit *p; struct tc_pedit_key *keys = NULL; struct tcf_pedit_key_ex *keys_ex; + struct tc_pedit *parm; + struct nlattr *pattr; + struct tcf_pedit *p; + int ret = 0, err; int ksize; - if (nla == NULL) + if (!nla) { + NL_SET_ERR_MSG_MOD(extack, "Pedit requires attributes to be passed"); return -EINVAL; + } err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy, NULL); if (err < 0) @@ -154,28 +156,34 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, pattr = tb[TCA_PEDIT_PARMS]; if (!pattr) pattr = tb[TCA_PEDIT_PARMS_EX]; - if (!pattr) + if (!pattr) { + NL_SET_ERR_MSG_MOD(extack, "Missing required TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute"); return -EINVAL; + } parm = nla_data(pattr); ksize = parm->nkeys * sizeof(struct tc_pedit_key); - if (nla_len(pattr) < sizeof(*parm) + ksize) + if (nla_len(pattr) < sizeof(*parm) + ksize) { + NL_SET_ERR_MSG_ATTR(extack, pattr, "Length of TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute is invalid"); return -EINVAL; + } keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys); if (IS_ERR(keys_ex)) return PTR_ERR(keys_ex); if (!tcf_idr_check(tn, parm->index, a, bind)) { - if (!parm->nkeys) + if (!parm->nkeys) { + NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed"); return -EINVAL; + } ret = tcf_idr_create(tn, parm->index, est, a, &act_pedit_ops, bind, false); if (ret) return ret; p = to_pedit(*a); keys = kmalloc(ksize, GFP_KERNEL); - if (keys == NULL) { + if (!keys) { tcf_idr_release(*a, bind); kfree(keys_ex); return -ENOMEM; @@ -220,6 +228,7 @@ static void tcf_pedit_cleanup(struct tc_action *a) { struct tcf_pedit *p = to_pedit(a); struct tc_pedit_key *keys = p->tcfp_keys; + kfree(keys); kfree(p->tcfp_keys_ex); } @@ -284,11 +293,12 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, if (p->tcfp_nkeys > 0) { struct tc_pedit_key *tkey = p->tcfp_keys; struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex; - enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK; + enum pedit_header_type htype = + TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK; enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET; for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { - u32 *ptr, _data; + u32 *ptr, hdata; int offset = tkey->off; int hoffset; u32 val; @@ -303,39 +313,39 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, rc = pedit_skb_hdr_offset(skb, htype, &hoffset); if (rc) { - pr_info("tc filter pedit bad header type specified (0x%x)\n", + pr_info("tc action pedit bad header type specified (0x%x)\n", htype); goto bad; } if (tkey->offmask) { - char *d, _d; + u8 *d, _d; if (!offset_valid(skb, hoffset + tkey->at)) { - pr_info("tc filter pedit 'at' offset %d out of bounds\n", + pr_info("tc action pedit 'at' offset %d out of bounds\n", hoffset + tkey->at); goto bad; } - d = skb_header_pointer(skb, hoffset + tkey->at, 1, - &_d); + d = skb_header_pointer(skb, hoffset + tkey->at, + sizeof(_d), &_d); if (!d) goto bad; offset += (*d & tkey->offmask) >> tkey->shift; } if (offset % 4) { - pr_info("tc filter pedit" - " offset must be on 32 bit boundaries\n"); + pr_info("tc action pedit offset must be on 32 bit boundaries\n"); goto bad; } if (!offset_valid(skb, hoffset + offset)) { - pr_info("tc filter pedit offset %d out of bounds\n", + pr_info("tc action pedit offset %d out of bounds\n", hoffset + offset); goto bad; } - ptr = skb_header_pointer(skb, hoffset + offset, 4, &_data); + ptr = skb_header_pointer(skb, hoffset + offset, + sizeof(hdata), &hdata); if (!ptr) goto bad; /* just do it, baby */ @@ -347,19 +357,20 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, val = (*ptr + tkey->val) & ~tkey->mask; break; default: - pr_info("tc filter pedit bad command (%d)\n", + pr_info("tc action pedit bad command (%d)\n", cmd); goto bad; } *ptr = ((*ptr & tkey->mask) ^ val); - if (ptr == &_data) + if (ptr == &hdata) skb_store_bits(skb, hoffset + offset, ptr, 4); } goto done; - } else + } else { WARN(1, "pedit BUG: index %d\n", p->tcf_index); + } bad: p->tcf_qstats.overlimits++; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 626dac81a48a..ea203e386a92 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> +#include <net/geneve.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/dst.h> @@ -57,6 +58,135 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, return action; } +static const struct nla_policy +enc_opts_policy[TCA_TUNNEL_KEY_ENC_OPTS_MAX + 1] = { + [TCA_TUNNEL_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1] = { + [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, + [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, + [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY, + .len = 128 }, +}; + +static int +tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1]; + int err, data_len, opt_len; + u8 *data; + + err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX, + nla, geneve_opt_policy, extack); + if (err < 0) + return err; + + if (!tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] || + !tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] || + !tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]) { + NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data"); + return -EINVAL; + } + + data = nla_data(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]); + data_len = nla_len(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]); + if (data_len < 4) { + NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long"); + return -ERANGE; + } + if (data_len % 4) { + NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long"); + return -ERANGE; + } + + opt_len = sizeof(struct geneve_opt) + data_len; + if (dst) { + struct geneve_opt *opt = dst; + + WARN_ON(dst_len < opt_len); + + opt->opt_class = + nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS]); + opt->type = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE]); + opt->length = data_len / 4; /* length is in units of 4 bytes */ + opt->r1 = 0; + opt->r2 = 0; + opt->r3 = 0; + + memcpy(opt + 1, data, data_len); + } + + return opt_len; +} + +static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, + int dst_len, struct netlink_ext_ack *extack) +{ + int err, rem, opt_len, len = nla_len(nla), opts_len = 0; + const struct nlattr *attr, *head = nla_data(nla); + + err = nla_validate(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX, + enc_opts_policy, extack); + if (err) + return err; + + nla_for_each_attr(attr, head, len, rem) { + switch (nla_type(attr)) { + case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE: + opt_len = tunnel_key_copy_geneve_opt(attr, dst, + dst_len, extack); + if (opt_len < 0) + return opt_len; + opts_len += opt_len; + if (dst) { + dst_len -= opt_len; + dst += opt_len; + } + break; + } + } + + if (!opts_len) { + NL_SET_ERR_MSG(extack, "Empty list of tunnel options"); + return -EINVAL; + } + + if (rem > 0) { + NL_SET_ERR_MSG(extack, "Trailing data after parsing tunnel key options attributes"); + return -EINVAL; + } + + return opts_len; +} + +static int tunnel_key_get_opts_len(struct nlattr *nla, + struct netlink_ext_ack *extack) +{ + return tunnel_key_copy_opts(nla, NULL, 0, extack); +} + +static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info, + int opts_len, struct netlink_ext_ack *extack) +{ + info->options_len = opts_len; + switch (nla_type(nla_data(nla))) { + case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE: +#if IS_ENABLED(CONFIG_INET) + info->key.tun_flags |= TUNNEL_GENEVE_OPT; + return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), + opts_len, extack); +#else + return -EAFNOSUPPORT; +#endif + default: + NL_SET_ERR_MSG(extack, "Cannot set tunnel options for unknown tunnel type"); + return -EINVAL; + } +} + static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { [TCA_TUNNEL_KEY_PARMS] = { .len = sizeof(struct tc_tunnel_key) }, [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 }, @@ -66,6 +196,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, [TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16}, [TCA_TUNNEL_KEY_NO_CSUM] = { .type = NLA_U8 }, + [TCA_TUNNEL_KEY_ENC_OPTS] = { .type = NLA_NESTED }, }; static int tunnel_key_init(struct net *net, struct nlattr *nla, @@ -81,21 +212,28 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, struct tcf_tunnel_key *t; bool exists = false; __be16 dst_port = 0; + int opts_len = 0; __be64 key_id; __be16 flags; int ret = 0; int err; - if (!nla) + if (!nla) { + NL_SET_ERR_MSG(extack, "Tunnel requires attributes to be passed"); return -EINVAL; + } err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy, - NULL); - if (err < 0) + extack); + if (err < 0) { + NL_SET_ERR_MSG(extack, "Failed to parse nested tunnel key attributes"); return err; + } - if (!tb[TCA_TUNNEL_KEY_PARMS]) + if (!tb[TCA_TUNNEL_KEY_PARMS]) { + NL_SET_ERR_MSG(extack, "Missing tunnel key parameters"); return -EINVAL; + } parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); exists = tcf_idr_check(tn, parm->index, a, bind); @@ -107,6 +245,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, break; case TCA_TUNNEL_KEY_ACT_SET: if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) { + NL_SET_ERR_MSG(extack, "Missing tunnel key id"); ret = -EINVAL; goto err_out; } @@ -121,6 +260,15 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT]) dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]); + if (tb[TCA_TUNNEL_KEY_ENC_OPTS]) { + opts_len = tunnel_key_get_opts_len(tb[TCA_TUNNEL_KEY_ENC_OPTS], + extack); + if (opts_len < 0) { + ret = opts_len; + goto err_out; + } + } + if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { __be32 saddr; @@ -131,7 +279,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, dst_port, flags, - key_id, 0); + key_id, opts_len); } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { struct in6_addr saddr; @@ -143,16 +291,30 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, dst_port, 0, flags, key_id, 0); + } else { + NL_SET_ERR_MSG(extack, "Missing either ipv4 or ipv6 src and dst"); + ret = -EINVAL; + goto err_out; } if (!metadata) { - ret = -EINVAL; + NL_SET_ERR_MSG(extack, "Cannot allocate tunnel metadata dst"); + ret = -ENOMEM; goto err_out; } + if (opts_len) { + ret = tunnel_key_opts_set(tb[TCA_TUNNEL_KEY_ENC_OPTS], + &metadata->u.tun_info, + opts_len, extack); + if (ret < 0) + goto err_out; + } + metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; break; default: + NL_SET_ERR_MSG(extack, "Unknown tunnel key action"); ret = -EINVAL; goto err_out; } @@ -160,14 +322,18 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, parm->index, est, a, &act_tunnel_key_ops, bind, true); - if (ret) + if (ret) { + NL_SET_ERR_MSG(extack, "Cannot create TC IDR"); return ret; + } ret = ACT_P_CREATED; } else { tcf_idr_release(*a, bind); - if (!ovr) + if (!ovr) { + NL_SET_ERR_MSG(extack, "TC IDR already exists"); return -EEXIST; + } } t = to_tunnel_key(*a); @@ -177,6 +343,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, if (unlikely(!params_new)) { if (ret == ACT_P_CREATED) tcf_idr_release(*a, bind); + NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters"); return -ENOMEM; } @@ -216,6 +383,61 @@ static void tunnel_key_release(struct tc_action *a) } } +static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, + const struct ip_tunnel_info *info) +{ + int len = info->options_len; + u8 *src = (u8 *)(info + 1); + struct nlattr *start; + + start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE); + if (!start) + return -EMSGSIZE; + + while (len > 0) { + struct geneve_opt *opt = (struct geneve_opt *)src; + + if (nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS, + opt->opt_class) || + nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE, + opt->type) || + nla_put(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA, + opt->length * 4, opt + 1)) + return -EMSGSIZE; + + len -= sizeof(struct geneve_opt) + opt->length * 4; + src += sizeof(struct geneve_opt) + opt->length * 4; + } + + nla_nest_end(skb, start); + return 0; +} + +static int tunnel_key_opts_dump(struct sk_buff *skb, + const struct ip_tunnel_info *info) +{ + struct nlattr *start; + int err; + + if (!info->options_len) + return 0; + + start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS); + if (!start) + return -EMSGSIZE; + + if (info->key.tun_flags & TUNNEL_GENEVE_OPT) { + err = tunnel_key_geneve_opts_dump(skb, info); + if (err) + return err; + } else { + return -EINVAL; + } + + nla_nest_end(skb, start); + return 0; +} + static int tunnel_key_dump_addresses(struct sk_buff *skb, const struct ip_tunnel_info *info) { @@ -266,8 +488,9 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, goto nla_put_failure; if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) { - struct ip_tunnel_key *key = - ¶ms->tcft_enc_metadata->u.tun_info.key; + struct ip_tunnel_info *info = + ¶ms->tcft_enc_metadata->u.tun_info; + struct ip_tunnel_key *key = &info->key; __be32 key_id = tunnel_id_to_key32(key->tun_id); if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) || @@ -275,7 +498,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, ¶ms->tcft_enc_metadata->u.tun_info) || nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst) || nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM, - !(key->tun_flags & TUNNEL_CSUM))) + !(key->tun_flags & TUNNEL_CSUM)) || + tunnel_key_opts_dump(skb, info)) goto nla_put_failure; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index cdc3c87c53e6..bbf8dda96b0e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -277,18 +277,21 @@ static bool tcf_block_offload_in_use(struct tcf_block *block) static int tcf_block_offload_cmd(struct tcf_block *block, struct net_device *dev, struct tcf_block_ext_info *ei, - enum tc_block_command command) + enum tc_block_command command, + struct netlink_ext_ack *extack) { struct tc_block_offload bo = {}; bo.command = command; bo.binder_type = ei->binder_type; bo.block = block; + bo.extack = extack; return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); } static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, - struct tcf_block_ext_info *ei) + struct tcf_block_ext_info *ei, + struct netlink_ext_ack *extack) { struct net_device *dev = q->dev_queue->dev; int err; @@ -299,10 +302,12 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, /* If tc offload feature is disabled and the block we try to bind * to already has some offloaded filters, forbid to bind. */ - if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) + if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) { + NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled"); return -EOPNOTSUPP; + } - err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND); + err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack); if (err == -EOPNOTSUPP) goto no_offload_dev_inc; return err; @@ -322,7 +327,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, if (!dev->netdev_ops->ndo_setup_tc) goto no_offload_dev_dec; - err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND); + err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL); if (err == -EOPNOTSUPP) goto no_offload_dev_dec; return; @@ -612,7 +617,7 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, if (err) goto err_chain_head_change_cb_add; - err = tcf_block_offload_bind(block, q, ei); + err = tcf_block_offload_bind(block, q, ei, extack); if (err) goto err_block_offload_bind; @@ -746,18 +751,53 @@ unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb) } EXPORT_SYMBOL(tcf_block_cb_decref); +static int +tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, + void *cb_priv, bool add, bool offload_in_use, + struct netlink_ext_ack *extack) +{ + struct tcf_chain *chain; + struct tcf_proto *tp; + int err; + + list_for_each_entry(chain, &block->chain_list, list) { + for (tp = rtnl_dereference(chain->filter_chain); tp; + tp = rtnl_dereference(tp->next)) { + if (tp->ops->reoffload) { + err = tp->ops->reoffload(tp, add, cb, cb_priv, + extack); + if (err && add) + goto err_playback_remove; + } else if (add && offload_in_use) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support"); + goto err_playback_remove; + } + } + } + + return 0; + +err_playback_remove: + tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use, + extack); + return err; +} + struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block, tc_setup_cb_t *cb, void *cb_ident, - void *cb_priv) + void *cb_priv, + struct netlink_ext_ack *extack) { struct tcf_block_cb *block_cb; + int err; - /* At this point, playback of previous block cb calls is not supported, - * so forbid to register to block which already has some offloaded - * filters present. - */ - if (tcf_block_offload_in_use(block)) - return ERR_PTR(-EOPNOTSUPP); + /* Replay any already present rules */ + err = tcf_block_playback_offloads(block, cb, cb_priv, true, + tcf_block_offload_in_use(block), + extack); + if (err) + return ERR_PTR(err); block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL); if (!block_cb) @@ -772,17 +812,22 @@ EXPORT_SYMBOL(__tcf_block_cb_register); int tcf_block_cb_register(struct tcf_block *block, tc_setup_cb_t *cb, void *cb_ident, - void *cb_priv) + void *cb_priv, struct netlink_ext_ack *extack) { struct tcf_block_cb *block_cb; - block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv); + block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv, + extack); return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0; } EXPORT_SYMBOL(tcf_block_cb_register); -void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb) +void __tcf_block_cb_unregister(struct tcf_block *block, + struct tcf_block_cb *block_cb) { + tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, + false, tcf_block_offload_in_use(block), + NULL); list_del(&block_cb->list); kfree(block_cb); } @@ -796,7 +841,7 @@ void tcf_block_cb_unregister(struct tcf_block *block, block_cb = tcf_block_cb_lookup(block, cb, cb_ident); if (!block_cb) return; - __tcf_block_cb_unregister(block_cb); + __tcf_block_cb_unregister(block, block_cb); } EXPORT_SYMBOL(tcf_block_cb_unregister); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 1aa7f6511065..66e0ac9811f9 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -43,6 +43,7 @@ struct cls_bpf_prog { struct tcf_result res; bool exts_integrated; u32 gen_flags; + unsigned int in_hw_count; struct tcf_exts exts; u32 handle; u16 bpf_num_ops; @@ -174,6 +175,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, cls_bpf_offload_cmd(tp, oldprog, prog, extack); return err; } else if (err > 0) { + prog->in_hw_count = err; tcf_block_offload_inc(block, &prog->gen_flags); } } @@ -652,6 +654,42 @@ skip: } } +static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, + void *cb_priv, struct netlink_ext_ack *extack) +{ + struct cls_bpf_head *head = rtnl_dereference(tp->root); + struct tcf_block *block = tp->chain->block; + struct tc_cls_bpf_offload cls_bpf = {}; + struct cls_bpf_prog *prog; + int err; + + list_for_each_entry(prog, &head->plist, link) { + if (tc_skip_hw(prog->gen_flags)) + continue; + + tc_cls_common_offload_init(&cls_bpf.common, tp, prog->gen_flags, + extack); + cls_bpf.command = TC_CLSBPF_OFFLOAD; + cls_bpf.exts = &prog->exts; + cls_bpf.prog = add ? prog->filter : NULL; + cls_bpf.oldprog = add ? NULL : prog->filter; + cls_bpf.name = prog->bpf_name; + cls_bpf.exts_integrated = prog->exts_integrated; + + err = cb(TC_SETUP_CLSBPF, &cls_bpf, cb_priv); + if (err) { + if (add && tc_skip_sw(prog->gen_flags)) + return err; + continue; + } + + tc_cls_offload_cnt_update(block, &prog->in_hw_count, + &prog->gen_flags, add); + } + + return 0; +} + static struct tcf_proto_ops cls_bpf_ops __read_mostly = { .kind = "bpf", .owner = THIS_MODULE, @@ -662,6 +700,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = { .change = cls_bpf_change, .delete = cls_bpf_delete, .walk = cls_bpf_walk, + .reoffload = cls_bpf_reoffload, .dump = cls_bpf_dump, .bind_class = cls_bpf_bind_class, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 9e8b26a80fb3..352876bb901b 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -87,6 +87,7 @@ struct cls_fl_filter { struct list_head list; u32 handle; u32 flags; + unsigned int in_hw_count; struct rcu_work rwork; struct net_device *hw_dev; }; @@ -289,6 +290,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, fl_hw_destroy_filter(tp, f, NULL); return err; } else if (err > 0) { + f->in_hw_count = err; tcf_block_offload_inc(block, &f->flags); } @@ -1087,6 +1089,47 @@ skip: } } +static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, + void *cb_priv, struct netlink_ext_ack *extack) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct tc_cls_flower_offload cls_flower = {}; + struct tcf_block *block = tp->chain->block; + struct fl_flow_mask *mask; + struct cls_fl_filter *f; + int err; + + list_for_each_entry(mask, &head->masks, list) { + list_for_each_entry(f, &mask->filters, list) { + if (tc_skip_hw(f->flags)) + continue; + + tc_cls_common_offload_init(&cls_flower.common, tp, + f->flags, extack); + cls_flower.command = add ? + TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY; + cls_flower.cookie = (unsigned long)f; + cls_flower.dissector = &mask->dissector; + cls_flower.mask = &f->mkey; + cls_flower.key = &f->key; + cls_flower.exts = &f->exts; + cls_flower.classid = f->res.classid; + + err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); + if (err) { + if (add && tc_skip_sw(f->flags)) + return err; + continue; + } + + tc_cls_offload_cnt_update(block, &f->in_hw_count, + &f->flags, add); + } + } + + return 0; +} + static int fl_dump_key_val(struct sk_buff *skb, void *val, int val_type, void *mask, int mask_type, int len) @@ -1438,6 +1481,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .change = fl_change, .delete = fl_delete, .walk = fl_walk, + .reoffload = fl_reoffload, .dump = fl_dump, .bind_class = fl_bind_class, .owner = THIS_MODULE, diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 47b207ef7762..af16f36ed578 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -21,6 +21,7 @@ struct cls_mall_head { struct tcf_result res; u32 handle; u32 flags; + unsigned int in_hw_count; struct rcu_work rwork; }; @@ -95,6 +96,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, mall_destroy_hw_filter(tp, head, cookie, NULL); return err; } else if (err > 0) { + head->in_hw_count = err; tcf_block_offload_inc(block, &head->flags); } @@ -235,6 +237,35 @@ skip: arg->count++; } +static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, + void *cb_priv, struct netlink_ext_ack *extack) +{ + struct cls_mall_head *head = rtnl_dereference(tp->root); + struct tc_cls_matchall_offload cls_mall = {}; + struct tcf_block *block = tp->chain->block; + int err; + + if (tc_skip_hw(head->flags)) + return 0; + + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack); + cls_mall.command = add ? + TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; + cls_mall.exts = &head->exts; + cls_mall.cookie = (unsigned long)head; + + err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv); + if (err) { + if (add && tc_skip_sw(head->flags)) + return err; + return 0; + } + + tc_cls_offload_cnt_update(block, &head->in_hw_count, &head->flags, add); + + return 0; +} + static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { @@ -289,6 +320,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = { .change = mall_change, .delete = mall_delete, .walk = mall_walk, + .reoffload = mall_reoffload, .dump = mall_dump, .bind_class = mall_bind_class, .owner = THIS_MODULE, diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index fb861f90fde6..d5d2a6dc3921 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -62,6 +62,7 @@ struct tc_u_knode { struct tc_u32_pcnt __percpu *pf; #endif u32 flags; + unsigned int in_hw_count; #ifdef CONFIG_CLS_U32_MARK u32 val; u32 mask; @@ -571,6 +572,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, u32_remove_hw_knode(tp, n, NULL); return err; } else if (err > 0) { + n->in_hw_count = err; tcf_block_offload_inc(block, &n->flags); } @@ -1199,6 +1201,114 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } +static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, + bool add, tc_setup_cb_t *cb, void *cb_priv, + struct netlink_ext_ack *extack) +{ + struct tc_cls_u32_offload cls_u32 = {}; + int err; + + tc_cls_common_offload_init(&cls_u32.common, tp, ht->flags, extack); + cls_u32.command = add ? TC_CLSU32_NEW_HNODE : TC_CLSU32_DELETE_HNODE; + cls_u32.hnode.divisor = ht->divisor; + cls_u32.hnode.handle = ht->handle; + cls_u32.hnode.prio = ht->prio; + + err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv); + if (err && add && tc_skip_sw(ht->flags)) + return err; + + return 0; +} + +static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n, + bool add, tc_setup_cb_t *cb, void *cb_priv, + struct netlink_ext_ack *extack) +{ + struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); + struct tcf_block *block = tp->chain->block; + struct tc_cls_u32_offload cls_u32 = {}; + int err; + + tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack); + cls_u32.command = add ? + TC_CLSU32_REPLACE_KNODE : TC_CLSU32_DELETE_KNODE; + cls_u32.knode.handle = n->handle; + + if (add) { + cls_u32.knode.fshift = n->fshift; +#ifdef CONFIG_CLS_U32_MARK + cls_u32.knode.val = n->val; + cls_u32.knode.mask = n->mask; +#else + cls_u32.knode.val = 0; + cls_u32.knode.mask = 0; +#endif + cls_u32.knode.sel = &n->sel; + cls_u32.knode.exts = &n->exts; + if (n->ht_down) + cls_u32.knode.link_handle = ht->handle; + } + + err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv); + if (err) { + if (add && tc_skip_sw(n->flags)) + return err; + return 0; + } + + tc_cls_offload_cnt_update(block, &n->in_hw_count, &n->flags, add); + + return 0; +} + +static int u32_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, + void *cb_priv, struct netlink_ext_ack *extack) +{ + struct tc_u_common *tp_c = tp->data; + struct tc_u_hnode *ht; + struct tc_u_knode *n; + unsigned int h; + int err; + + for (ht = rtnl_dereference(tp_c->hlist); + ht; + ht = rtnl_dereference(ht->next)) { + if (ht->prio != tp->prio) + continue; + + /* When adding filters to a new dev, try to offload the + * hashtable first. When removing, do the filters before the + * hashtable. + */ + if (add && !tc_skip_hw(ht->flags)) { + err = u32_reoffload_hnode(tp, ht, add, cb, cb_priv, + extack); + if (err) + return err; + } + + for (h = 0; h <= ht->divisor; h++) { + for (n = rtnl_dereference(ht->ht[h]); + n; + n = rtnl_dereference(n->next)) { + if (tc_skip_hw(n->flags)) + continue; + + err = u32_reoffload_knode(tp, n, add, cb, + cb_priv, extack); + if (err) + return err; + } + } + + if (!add && !tc_skip_hw(ht->flags)) + u32_reoffload_hnode(tp, ht, add, cb, cb_priv, extack); + } + + return 0; +} + static void u32_bind_class(void *fh, u32 classid, unsigned long cl) { struct tc_u_knode *n = fh; @@ -1336,6 +1446,7 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = { .change = u32_change, .delete = u32_delete, .walk = u32_walk, + .reoffload = u32_reoffload, .dump = u32_dump, .bind_class = u32_bind_class, .owner = THIS_MODULE, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 2a4ab7caf553..43c4bfe625a9 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -126,7 +126,6 @@ struct htb_class { union { struct htb_class_leaf { - struct list_head drop_list; int deficit[TC_HTB_MAXDEPTH]; struct Qdisc *q; } leaf; @@ -171,7 +170,6 @@ struct htb_sched { struct qdisc_watchdog watchdog; s64 now; /* cached dequeue time */ - struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ /* time of nearest event per level (row) */ s64 near_ev_cache[TC_HTB_MAXDEPTH]; @@ -562,8 +560,6 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) if (!cl->prio_activity) { cl->prio_activity = 1 << cl->prio; htb_activate_prios(q, cl); - list_add_tail(&cl->un.leaf.drop_list, - q->drops + cl->prio); } } @@ -579,7 +575,6 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) htb_deactivate_prios(q, cl); cl->prio_activity = 0; - list_del_init(&cl->un.leaf.drop_list); } static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, @@ -981,7 +976,6 @@ static void htb_reset(struct Qdisc *sch) else { if (cl->un.leaf.q) qdisc_reset(cl->un.leaf.q); - INIT_LIST_HEAD(&cl->un.leaf.drop_list); } cl->prio_activity = 0; cl->cmode = HTB_CAN_SEND; @@ -993,8 +987,6 @@ static void htb_reset(struct Qdisc *sch) sch->qstats.backlog = 0; memset(q->hlevel, 0, sizeof(q->hlevel)); memset(q->row_mask, 0, sizeof(q->row_mask)); - for (i = 0; i < TC_HTB_NUMPRIO; i++) - INIT_LIST_HEAD(q->drops + i); } static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { @@ -1024,7 +1016,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_glob *gopt; int err; - int i; qdisc_watchdog_init(&q->watchdog, sch); INIT_WORK(&q->work, htb_work_func); @@ -1050,8 +1041,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, err = qdisc_class_hash_init(&q->clhash); if (err < 0) return err; - for (i = 0; i < TC_HTB_NUMPRIO; i++) - INIT_LIST_HEAD(q->drops + i); qdisc_skb_head_init(&q->direct_queue); @@ -1224,7 +1213,6 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, parent->level = 0; memset(&parent->un.inner, 0, sizeof(parent->un.inner)); - INIT_LIST_HEAD(&parent->un.leaf.drop_list); parent->un.leaf.q = new_q ? new_q : &noop_qdisc; parent->tokens = parent->buffer; parent->ctokens = parent->cbuffer; @@ -1418,7 +1406,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } cl->children = 0; - INIT_LIST_HEAD(&cl->un.leaf.drop_list); RB_CLEAR_NODE(&cl->pq_node); for (prio = 0; prio < TC_HTB_NUMPRIO; prio++) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 7d6801fc5340..ad18a2052416 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -68,6 +68,11 @@ Fabio Ludovici <fabio.ludovici at yahoo.it> */ +struct disttable { + u32 size; + s16 table[0]; +}; + struct netem_sched_data { /* internal t(ime)fifo qdisc uses t_root and sch->limit */ struct rb_root t_root; @@ -99,10 +104,7 @@ struct netem_sched_data { u32 rho; } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; - struct disttable { - u32 size; - s16 table[0]; - } *delay_dist; + struct disttable *delay_dist; enum { CLG_RANDOM, @@ -142,6 +144,7 @@ struct netem_sched_data { s32 bytes_left; } slot; + struct disttable *slot_dist; }; /* Time stamp put into socket buffer control block @@ -180,7 +183,7 @@ static u32 get_crandom(struct crndstate *state) u64 value, rho; unsigned long answer; - if (state->rho == 0) /* no correlation */ + if (!state || state->rho == 0) /* no correlation */ return prandom_u32(); value = prandom_u32(); @@ -601,10 +604,19 @@ finish_segs: static void get_slot_next(struct netem_sched_data *q, u64 now) { - q->slot.slot_next = now + q->slot_config.min_delay + - (prandom_u32() * - (q->slot_config.max_delay - - q->slot_config.min_delay) >> 32); + s64 next_delay; + + if (!q->slot_dist) + next_delay = q->slot_config.min_delay + + (prandom_u32() * + (q->slot_config.max_delay - + q->slot_config.min_delay) >> 32); + else + next_delay = tabledist(q->slot_config.dist_delay, + (s32)(q->slot_config.dist_jitter), + NULL, q->slot_dist); + + q->slot.slot_next = now + next_delay; q->slot.packets_left = q->slot_config.max_packets; q->slot.bytes_left = q->slot_config.max_bytes; } @@ -721,9 +733,9 @@ static void dist_free(struct disttable *d) * signed 16 bit values. */ -static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) +static int get_dist_table(struct Qdisc *sch, struct disttable **tbl, + const struct nlattr *attr) { - struct netem_sched_data *q = qdisc_priv(sch); size_t n = nla_len(attr)/sizeof(__s16); const __s16 *data = nla_data(attr); spinlock_t *root_lock; @@ -744,7 +756,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) root_lock = qdisc_root_sleeping_lock(sch); spin_lock_bh(root_lock); - swap(q->delay_dist, d); + swap(*tbl, d); spin_unlock_bh(root_lock); dist_free(d); @@ -762,7 +774,8 @@ static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) q->slot_config.max_bytes = INT_MAX; q->slot.packets_left = q->slot_config.max_packets; q->slot.bytes_left = q->slot_config.max_bytes; - if (q->slot_config.min_delay | q->slot_config.max_delay) + if (q->slot_config.min_delay | q->slot_config.max_delay | + q->slot_config.dist_jitter) q->slot.slot_next = ktime_get_ns(); else q->slot.slot_next = 0; @@ -926,16 +939,17 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, } if (tb[TCA_NETEM_DELAY_DIST]) { - ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]); - if (ret) { - /* recover clg and loss_model, in case of - * q->clg and q->loss_model were modified - * in get_loss_clg() - */ - q->clg = old_clg; - q->loss_model = old_loss_model; - return ret; - } + ret = get_dist_table(sch, &q->delay_dist, + tb[TCA_NETEM_DELAY_DIST]); + if (ret) + goto get_table_failure; + } + + if (tb[TCA_NETEM_SLOT_DIST]) { + ret = get_dist_table(sch, &q->slot_dist, + tb[TCA_NETEM_SLOT_DIST]); + if (ret) + goto get_table_failure; } sch->limit = qopt->limit; @@ -983,6 +997,15 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, get_slot(q, tb[TCA_NETEM_SLOT]); return ret; + +get_table_failure: + /* recover clg and loss_model, in case of + * q->clg and q->loss_model were modified + * in get_loss_clg() + */ + q->clg = old_clg; + q->loss_model = old_loss_model; + return ret; } static int netem_init(struct Qdisc *sch, struct nlattr *opt, @@ -1011,6 +1034,7 @@ static void netem_destroy(struct Qdisc *sch) if (q->qdisc) qdisc_destroy(q->qdisc); dist_free(q->delay_dist); + dist_free(q->slot_dist); } static int dump_loss_model(const struct netem_sched_data *q, @@ -1127,7 +1151,8 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) if (dump_loss_model(q, skb) != 0) goto nla_put_failure; - if (q->slot_config.min_delay | q->slot_config.max_delay) { + if (q->slot_config.min_delay | q->slot_config.max_delay | + q->slot_config.dist_jitter) { slot = q->slot_config; if (slot.max_packets == INT_MAX) slot.max_packets = 0; diff --git a/net/sctp/input.c b/net/sctp/input.c index ba8a6e6c36fa..9bbc5f92c941 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -56,6 +56,7 @@ #include <net/sctp/sm.h> #include <net/sctp/checksum.h> #include <net/net_namespace.h> +#include <linux/rhashtable.h> /* Forward declarations for internal helpers. */ static int sctp_rcv_ootb(struct sk_buff *); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ce620e878538..0e4c8332771a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -66,6 +66,7 @@ #include <linux/slab.h> #include <linux/file.h> #include <linux/compat.h> +#include <linux/rhashtable.h> #include <net/ip.h> #include <net/icmp.h> @@ -4169,6 +4170,28 @@ out: return retval; } +static int sctp_setsockopt_reuse_port(struct sock *sk, char __user *optval, + unsigned int optlen) +{ + int val; + + if (!sctp_style(sk, TCP)) + return -EOPNOTSUPP; + + if (sctp_sk(sk)->ep->base.bind_addr.port) + return -EFAULT; + + if (optlen < sizeof(int)) + return -EINVAL; + + if (get_user(val, (int __user *)optval)) + return -EFAULT; + + sctp_sk(sk)->reuse = !!val; + + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -4363,6 +4386,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_interleaving_supported(sk, optval, optlen); break; + case SCTP_REUSE_PORT: + retval = sctp_setsockopt_reuse_port(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -7196,6 +7222,26 @@ out: return retval; } +static int sctp_getsockopt_reuse_port(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + int val; + + if (len < sizeof(int)) + return -EINVAL; + + len = sizeof(int); + val = sctp_sk(sk)->reuse; + if (put_user(len, optlen)) + return -EFAULT; + + if (copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -7391,6 +7437,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_interleaving_supported(sk, len, optval, optlen); break; + case SCTP_REUSE_PORT: + retval = sctp_getsockopt_reuse_port(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -7428,6 +7477,7 @@ static struct sctp_bind_bucket *sctp_bucket_create( static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) { + bool reuse = (sk->sk_reuse || sctp_sk(sk)->reuse); struct sctp_bind_hashbucket *head; /* hash list */ struct sctp_bind_bucket *pp; unsigned short snum; @@ -7500,13 +7550,11 @@ pp_found: * used by other socket (pp->owner not empty); that other * socket is going to be sk2. */ - int reuse = sk->sk_reuse; struct sock *sk2; pr_debug("%s: found a possible match\n", __func__); - if (pp->fastreuse && sk->sk_reuse && - sk->sk_state != SCTP_SS_LISTENING) + if (pp->fastreuse && reuse && sk->sk_state != SCTP_SS_LISTENING) goto success; /* Run through the list of sockets bound to the port @@ -7524,7 +7572,7 @@ pp_found: ep2 = sctp_sk(sk2)->ep; if (sk == sk2 || - (reuse && sk2->sk_reuse && + (reuse && (sk2->sk_reuse || sctp_sk(sk2)->reuse) && sk2->sk_state != SCTP_SS_LISTENING)) continue; @@ -7548,12 +7596,12 @@ pp_not_found: * SO_REUSEADDR on this socket -sk-). */ if (hlist_empty(&pp->owner)) { - if (sk->sk_reuse && sk->sk_state != SCTP_SS_LISTENING) + if (reuse && sk->sk_state != SCTP_SS_LISTENING) pp->fastreuse = 1; else pp->fastreuse = 0; } else if (pp->fastreuse && - (!sk->sk_reuse || sk->sk_state == SCTP_SS_LISTENING)) + (!reuse || sk->sk_state == SCTP_SS_LISTENING)) pp->fastreuse = 0; /* We are set, so fill up all the data in the hash table @@ -7684,7 +7732,7 @@ int sctp_inet_listen(struct socket *sock, int backlog) err = 0; sctp_unhash_endpoint(ep); sk->sk_state = SCTP_SS_CLOSED; - if (sk->sk_reuse) + if (sk->sk_reuse || sctp_sk(sk)->reuse) sctp_sk(sk)->bind_hash->fastreuse = 1; goto out; } @@ -8551,6 +8599,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_no_check_tx = sk->sk_no_check_tx; newsk->sk_no_check_rx = sk->sk_no_check_rx; newsk->sk_reuse = sk->sk_reuse; + sctp_sk(newsk)->reuse = sp->reuse; newsk->sk_shutdown = sk->sk_shutdown; newsk->sk_destruct = sctp_destruct_sock; diff --git a/net/smc/Makefile b/net/smc/Makefile index 188104654b54..4df96b4b8130 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_SMC) += smc.o obj-$(CONFIG_SMC_DIAG) += smc_diag.o smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o -smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o +smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index e017b6a4452b..f3fdf3714f8b 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -23,6 +23,7 @@ #include <linux/workqueue.h> #include <linux/in.h> #include <linux/sched/signal.h> +#include <linux/if_vlan.h> #include <net/sock.h> #include <net/tcp.h> @@ -35,6 +36,7 @@ #include "smc_cdc.h" #include "smc_core.h" #include "smc_ib.h" +#include "smc_ism.h" #include "smc_pnet.h" #include "smc_tx.h" #include "smc_rx.h" @@ -380,8 +382,8 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) return 0; } -static void smc_conn_save_peer_info(struct smc_sock *smc, - struct smc_clc_msg_accept_confirm *clc) +static void smcr_conn_save_peer_info(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *clc) { int bufsize = smc_uncompress_bufsize(clc->rmbe_size); @@ -392,6 +394,28 @@ static void smc_conn_save_peer_info(struct smc_sock *smc, smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); } +static void smcd_conn_save_peer_info(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *clc) +{ + int bufsize = smc_uncompress_bufsize(clc->dmbe_size); + + smc->conn.peer_rmbe_idx = clc->dmbe_idx; + smc->conn.peer_token = clc->token; + /* msg header takes up space in the buffer */ + smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); + atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); + smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; +} + +static void smc_conn_save_peer_info(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *clc) +{ + if (smc->conn.lgr->is_smcd) + smcd_conn_save_peer_info(smc, clc); + else + smcr_conn_save_peer_info(smc, clc); +} + static void smc_link_save_peer_info(struct smc_link *link, struct smc_clc_msg_accept_confirm *clc) { @@ -458,15 +482,51 @@ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev, return reason_code; } +/* check if there is an ISM device available for this connection. */ +/* called for connect and listen */ +static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev) +{ + /* Find ISM device with same PNETID as connecting interface */ + smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev); + if (!(*ismdev)) + return SMC_CLC_DECL_CNFERR; /* configuration error */ + return 0; +} + +/* Check for VLAN ID and register it on ISM device just for CLC handshake */ +static int smc_connect_ism_vlan_setup(struct smc_sock *smc, + struct smcd_dev *ismdev, + unsigned short vlan_id) +{ + if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id)) + return SMC_CLC_DECL_CNFERR; + return 0; +} + +/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is + * used, the VLAN ID will be registered again during the connection setup. + */ +static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, + struct smcd_dev *ismdev, + unsigned short vlan_id) +{ + if (!is_smcd) + return 0; + if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id)) + return SMC_CLC_DECL_CNFERR; + return 0; +} + /* CLC handshake during connect */ -static int smc_connect_clc(struct smc_sock *smc, +static int smc_connect_clc(struct smc_sock *smc, int smc_type, struct smc_clc_msg_accept_confirm *aclc, - struct smc_ib_device *ibdev, u8 ibport) + struct smc_ib_device *ibdev, u8 ibport, + struct smcd_dev *ismdev) { int rc = 0; /* do inband token exchange */ - rc = smc_clc_send_proposal(smc, ibdev, ibport); + rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, ismdev); if (rc) return rc; /* receive SMC Accept CLC message */ @@ -483,8 +543,8 @@ static int smc_connect_rdma(struct smc_sock *smc, int reason_code = 0; mutex_lock(&smc_create_lgr_pending); - local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl, - aclc->hdr.flag); + local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev, + ibport, &aclc->lcl, NULL, 0); if (local_contact < 0) { if (local_contact == -ENOMEM) reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ @@ -499,7 +559,7 @@ static int smc_connect_rdma(struct smc_sock *smc, smc_conn_save_peer_info(smc, aclc); /* create send buffer and rmb */ - if (smc_buf_create(smc)) + if (smc_buf_create(smc, false)) return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact); if (local_contact == SMC_FIRST_CONTACT) @@ -546,11 +606,50 @@ static int smc_connect_rdma(struct smc_sock *smc, return 0; } +/* setup for ISM connection of client */ +static int smc_connect_ism(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *aclc, + struct smcd_dev *ismdev) +{ + int local_contact = SMC_FIRST_CONTACT; + int rc = 0; + + mutex_lock(&smc_create_lgr_pending); + local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, + NULL, ismdev, aclc->gid); + if (local_contact < 0) + return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0); + + /* Create send and receive buffers */ + if (smc_buf_create(smc, true)) + return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact); + + smc_conn_save_peer_info(smc, aclc); + smc_close_init(smc); + smc_rx_init(smc); + smc_tx_init(smc); + + rc = smc_clc_send_confirm(smc); + if (rc) + return smc_connect_abort(smc, rc, local_contact); + mutex_unlock(&smc_create_lgr_pending); + + smc_copy_sock_settings_to_clc(smc); + if (smc->sk.sk_state == SMC_INIT) + smc->sk.sk_state = SMC_ACTIVE; + + return 0; +} + /* perform steps before actually connecting */ static int __smc_connect(struct smc_sock *smc) { + bool ism_supported = false, rdma_supported = false; struct smc_clc_msg_accept_confirm aclc; struct smc_ib_device *ibdev; + struct smcd_dev *ismdev; + unsigned short vlan; + int smc_type; int rc = 0; u8 ibport; @@ -567,20 +666,52 @@ static int __smc_connect(struct smc_sock *smc) if (using_ipsec(smc)) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); - /* check if a RDMA device is available; if not, fall back */ - if (smc_check_rdma(smc, &ibdev, &ibport)) + /* check for VLAN ID */ + if (smc_vlan_by_tcpsk(smc->clcsock, &vlan)) + return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); + + /* check if there is an ism device available */ + if (!smc_check_ism(smc, &ismdev) && + !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) { + /* ISM is supported for this connection */ + ism_supported = true; + smc_type = SMC_TYPE_D; + } + + /* check if there is a rdma device available */ + if (!smc_check_rdma(smc, &ibdev, &ibport)) { + /* RDMA is supported for this connection */ + rdma_supported = true; + if (ism_supported) + smc_type = SMC_TYPE_B; /* both */ + else + smc_type = SMC_TYPE_R; /* only RDMA */ + } + + /* if neither ISM nor RDMA are supported, fallback */ + if (!rdma_supported && !ism_supported) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); /* perform CLC handshake */ - rc = smc_connect_clc(smc, &aclc, ibdev, ibport); - if (rc) + rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, ismdev); + if (rc) { + smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); return smc_connect_decline_fallback(smc, rc); + } - /* connect using rdma */ - rc = smc_connect_rdma(smc, &aclc, ibdev, ibport); - if (rc) + /* depending on previous steps, connect using rdma or ism */ + if (rdma_supported && aclc.hdr.path == SMC_TYPE_R) + rc = smc_connect_rdma(smc, &aclc, ibdev, ibport); + else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) + rc = smc_connect_ism(smc, &aclc, ismdev); + else + rc = SMC_CLC_DECL_CNFERR; + if (rc) { + smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); return smc_connect_decline_fallback(smc, rc); + } + smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); return 0; } @@ -948,7 +1079,8 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc, int *local_contact) { /* allocate connection / link group */ - *local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0); + *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, + &pclc->lcl, NULL, 0); if (*local_contact < 0) { if (*local_contact == -ENOMEM) return SMC_CLC_DECL_MEM;/* insufficient memory*/ @@ -956,12 +1088,50 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc, } /* create send buffer and rmb */ - if (smc_buf_create(new_smc)) + if (smc_buf_create(new_smc, false)) return SMC_CLC_DECL_MEM; return 0; } +/* listen worker: initialize connection and buffers for SMC-D */ +static int smc_listen_ism_init(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smcd_dev *ismdev, + int *local_contact) +{ + struct smc_clc_msg_smcd *pclc_smcd; + + pclc_smcd = smc_get_clc_msg_smcd(pclc); + *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL, + ismdev, pclc_smcd->gid); + if (*local_contact < 0) { + if (*local_contact == -ENOMEM) + return SMC_CLC_DECL_MEM;/* insufficient memory*/ + return SMC_CLC_DECL_INTERR; /* other error */ + } + + /* Check if peer can be reached via ISM device */ + if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid, + new_smc->conn.lgr->vlan_id, + new_smc->conn.lgr->smcd)) { + if (*local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); + smc_conn_free(&new_smc->conn); + return SMC_CLC_DECL_CNFERR; + } + + /* Create send and receive buffers */ + if (smc_buf_create(new_smc, true)) { + if (*local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); + smc_conn_free(&new_smc->conn); + return SMC_CLC_DECL_MEM; + } + + return 0; +} + /* listen worker: register buffers */ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) { @@ -1020,6 +1190,8 @@ static void smc_listen_work(struct work_struct *work) struct smc_clc_msg_accept_confirm cclc; struct smc_clc_msg_proposal *pclc; struct smc_ib_device *ibdev; + bool ism_supported = false; + struct smcd_dev *ismdev; u8 buf[SMC_CLC_MAX_LEN]; int local_contact = 0; int reason_code = 0; @@ -1060,12 +1232,21 @@ static void smc_listen_work(struct work_struct *work) smc_rx_init(new_smc); smc_tx_init(new_smc); + /* check if ISM is available */ + if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) && + !smc_check_ism(new_smc, &ismdev) && + !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) { + ism_supported = true; + } + /* check if RDMA is available */ - if (smc_check_rdma(new_smc, &ibdev, &ibport) || - smc_listen_rdma_check(new_smc, pclc) || - smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, - &local_contact) || - smc_listen_rdma_reg(new_smc, local_contact)) { + if (!ism_supported && + ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) || + smc_check_rdma(new_smc, &ibdev, &ibport) || + smc_listen_rdma_check(new_smc, pclc) || + smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, + &local_contact) || + smc_listen_rdma_reg(new_smc, local_contact))) { /* SMC not supported, decline */ mutex_unlock(&smc_create_lgr_pending); smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact); @@ -1090,7 +1271,8 @@ static void smc_listen_work(struct work_struct *work) } /* finish worker */ - smc_listen_rdma_finish(new_smc, &cclc, local_contact); + if (!ism_supported) + smc_listen_rdma_finish(new_smc, &cclc, local_contact); smc_conn_save_peer_info(new_smc, &cclc); mutex_unlock(&smc_create_lgr_pending); smc_listen_out_connected(new_smc); diff --git a/net/smc/smc.h b/net/smc/smc.h index d7ca26570482..be20acd7b5ab 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -21,8 +21,6 @@ #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ -#define SMC_MAX_PORTS 2 /* Max # of ports */ - extern struct proto smc_proto; extern struct proto smc_proto6; @@ -185,6 +183,11 @@ struct smc_connection { spinlock_t acurs_lock; /* protect cursors */ #endif struct work_struct close_work; /* peer sent some closing */ + struct tasklet_struct rx_tsklet; /* Receiver tasklet for SMC-D */ + u8 rx_off; /* receive offset: + * 0 for SMC-R, 32 for SMC-D + */ + u64 peer_token; /* SMC-D token of peer */ }; struct smc_connect_info { diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index a7e8d63fc8ae..621d8cca570b 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -117,7 +117,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, return rc; } -int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) +static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) { struct smc_cdc_tx_pend *pend; struct smc_wr_buf *wr_buf; @@ -130,6 +130,21 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) return smc_cdc_msg_send(conn, wr_buf, pend); } +int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) +{ + int rc; + + if (conn->lgr->is_smcd) { + spin_lock_bh(&conn->send_lock); + rc = smcd_cdc_msg_send(conn); + spin_unlock_bh(&conn->send_lock); + } else { + rc = smcr_cdc_get_slot_and_msg_send(conn); + } + + return rc; +} + static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend, unsigned long data) { @@ -157,6 +172,45 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn) (unsigned long)conn); } +/* Send a SMC-D CDC header. + * This increments the free space available in our send buffer. + * Also update the confirmed receive buffer with what was sent to the peer. + */ +int smcd_cdc_msg_send(struct smc_connection *conn) +{ + struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + struct smcd_cdc_msg cdc; + int rc, diff; + + memset(&cdc, 0, sizeof(cdc)); + cdc.common.type = SMC_CDC_MSG_TYPE; + cdc.prod_wrap = conn->local_tx_ctrl.prod.wrap; + cdc.prod_count = conn->local_tx_ctrl.prod.count; + + cdc.cons_wrap = conn->local_tx_ctrl.cons.wrap; + cdc.cons_count = conn->local_tx_ctrl.cons.count; + cdc.prod_flags = conn->local_tx_ctrl.prod_flags; + cdc.conn_state_flags = conn->local_tx_ctrl.conn_state_flags; + rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1); + if (rc) + return rc; + smc_curs_write(&conn->rx_curs_confirmed, + smc_curs_read(&conn->local_tx_ctrl.cons, conn), conn); + /* Calculate transmitted data and increment free send buffer space */ + diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin, + &conn->tx_curs_sent); + /* increased by confirmed number of bytes */ + smp_mb__before_atomic(); + atomic_add(diff, &conn->sndbuf_space); + /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ + smp_mb__after_atomic(); + smc_curs_write(&conn->tx_curs_fin, + smc_curs_read(&conn->tx_curs_sent, conn), conn); + + smc_tx_sndbuf_nonfull(smc); + return rc; +} + /********************************* receive ***********************************/ static inline bool smc_cdc_before(u16 seq1, u16 seq2) @@ -178,7 +232,7 @@ static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc, if (!sock_flag(&smc->sk, SOCK_URGINLINE)) /* we'll skip the urgent byte, so don't account for it */ (*diff_prod)--; - base = (char *)conn->rmb_desc->cpu_addr; + base = (char *)conn->rmb_desc->cpu_addr + conn->rx_off; if (conn->urg_curs.count) conn->urg_rx_byte = *(base + conn->urg_curs.count - 1); else @@ -276,6 +330,34 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc) sock_put(&smc->sk); /* no free sk in softirq-context */ } +/* Schedule a tasklet for this connection. Triggered from the ISM device IRQ + * handler to indicate update in the DMBE. + * + * Context: + * - tasklet context + */ +static void smcd_cdc_rx_tsklet(unsigned long data) +{ + struct smc_connection *conn = (struct smc_connection *)data; + struct smcd_cdc_msg cdc; + struct smc_sock *smc; + + if (!conn) + return; + + memcpy(&cdc, conn->rmb_desc->cpu_addr, sizeof(cdc)); + smc = container_of(conn, struct smc_sock, conn); + smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc); +} + +/* Initialize receive tasklet. Called from ISM device IRQ handler to start + * receiver side. + */ +void smcd_cdc_rx_init(struct smc_connection *conn) +{ + tasklet_init(&conn->rx_tsklet, smcd_cdc_rx_tsklet, (unsigned long)conn); +} + /***************************** init, exit, misc ******************************/ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf) diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index f60082fee5b8..8fbce4fee3e4 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -50,6 +50,20 @@ struct smc_cdc_msg { u8 reserved[18]; } __packed; /* format defined in RFC7609 */ +/* CDC message for SMC-D */ +struct smcd_cdc_msg { + struct smc_wr_rx_hdr common; /* Type = 0xFE */ + u8 res1[7]; + u16 prod_wrap; + u32 prod_count; + u8 res2[2]; + u16 cons_wrap; + u32 cons_count; + struct smc_cdc_producer_flags prod_flags; + struct smc_cdc_conn_state_flags conn_state_flags; + u8 res3[8]; +} __packed; + static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn) { return conn->local_rx_ctrl.conn_state_flags.peer_conn_abort || @@ -204,9 +218,9 @@ static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local, smc_curs_write(local, smc_curs_read(&temp, conn), conn); } -static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, - struct smc_cdc_msg *peer, - struct smc_connection *conn) +static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local, + struct smc_cdc_msg *peer, + struct smc_connection *conn) { local->common.type = peer->common.type; local->len = peer->len; @@ -218,6 +232,27 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, local->conn_state_flags = peer->conn_state_flags; } +static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local, + struct smcd_cdc_msg *peer) +{ + local->prod.wrap = peer->prod_wrap; + local->prod.count = peer->prod_count; + local->cons.wrap = peer->cons_wrap; + local->cons.count = peer->cons_count; + local->prod_flags = peer->prod_flags; + local->conn_state_flags = peer->conn_state_flags; +} + +static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, + struct smc_cdc_msg *peer, + struct smc_connection *conn) +{ + if (conn->lgr->is_smcd) + smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer); + else + smcr_cdc_msg_to_host(local, peer, conn); +} + struct smc_cdc_tx_pend; int smc_cdc_get_free_slot(struct smc_connection *conn, @@ -227,6 +262,8 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); +int smcd_cdc_msg_send(struct smc_connection *conn); int smc_cdc_init(void) __init; +void smcd_cdc_rx_init(struct smc_connection *conn); #endif /* SMC_CDC_H */ diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 717449b1da0b..038d70ef7892 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -23,9 +23,15 @@ #include "smc_core.h" #include "smc_clc.h" #include "smc_ib.h" +#include "smc_ism.h" + +#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 +#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 /* eye catcher "SMCR" EBCDIC for CLC messages */ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; +/* eye catcher "SMCD" EBCDIC for CLC messages */ +static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'}; /* check if received message has a correct header length and contains valid * heading and trailing eyecatchers @@ -38,10 +44,14 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) struct smc_clc_msg_decline *dclc; struct smc_clc_msg_trail *trl; - if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && + memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) return false; switch (clcm->type) { case SMC_CLC_PROPOSAL: + if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D && + clcm->path != SMC_TYPE_B) + return false; pclc = (struct smc_clc_msg_proposal *)clcm; pclc_prfx = smc_clc_proposal_get_prefix(pclc); if (ntohs(pclc->hdr.length) != @@ -56,10 +66,16 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) break; case SMC_CLC_ACCEPT: case SMC_CLC_CONFIRM: + if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D) + return false; clc = (struct smc_clc_msg_accept_confirm *)clcm; - if (ntohs(clc->hdr.length) != sizeof(*clc)) + if ((clcm->path == SMC_TYPE_R && + ntohs(clc->hdr.length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) || + (clcm->path == SMC_TYPE_D && + ntohs(clc->hdr.length) != SMCD_CLC_ACCEPT_CONFIRM_LEN)) return false; - trl = &clc->trl; + trl = (struct smc_clc_msg_trail *) + ((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl)); break; case SMC_CLC_DECLINE: dclc = (struct smc_clc_msg_decline *)clcm; @@ -70,7 +86,8 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) default: return false; } - if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && + memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) return false; return true; } @@ -295,6 +312,9 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, datlen = ntohs(clcm->length); if ((len < sizeof(struct smc_clc_msg_hdr)) || (datlen > buflen) || + (clcm->version != SMC_CLC_V1) || + (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D && + clcm->path != SMC_TYPE_B) || ((clcm->type != SMC_CLC_DECLINE) && (clcm->type != expected_type))) { smc->sk.sk_err = EPROTO; @@ -356,17 +376,18 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) } /* send CLC PROPOSAL message across internal TCP socket */ -int smc_clc_send_proposal(struct smc_sock *smc, - struct smc_ib_device *smcibdev, - u8 ibport) +int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, + struct smc_ib_device *ibdev, u8 ibport, + struct smcd_dev *ismdev) { struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX]; struct smc_clc_msg_proposal_prefix pclc_prfx; + struct smc_clc_msg_smcd pclc_smcd; struct smc_clc_msg_proposal pclc; struct smc_clc_msg_trail trl; int len, i, plen, rc; int reason_code = 0; - struct kvec vec[4]; + struct kvec vec[5]; struct msghdr msg; /* retrieve ip prefixes for CLC proposal msg */ @@ -381,18 +402,34 @@ int smc_clc_send_proposal(struct smc_sock *smc, memset(&pclc, 0, sizeof(pclc)); memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); pclc.hdr.type = SMC_CLC_PROPOSAL; - pclc.hdr.length = htons(plen); pclc.hdr.version = SMC_CLC_V1; /* SMC version */ - memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); - memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); - pclc.iparea_offset = htons(0); + pclc.hdr.path = smc_type; + if (smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B) { + /* add SMC-R specifics */ + memcpy(pclc.lcl.id_for_peer, local_systemid, + sizeof(local_systemid)); + memcpy(&pclc.lcl.gid, &ibdev->gid[ibport - 1], SMC_GID_SIZE); + memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN); + pclc.iparea_offset = htons(0); + } + if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) { + /* add SMC-D specifics */ + memset(&pclc_smcd, 0, sizeof(pclc_smcd)); + plen += sizeof(pclc_smcd); + pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET); + pclc_smcd.gid = ismdev->local_gid; + } + pclc.hdr.length = htons(plen); memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memset(&msg, 0, sizeof(msg)); i = 0; vec[i].iov_base = &pclc; vec[i++].iov_len = sizeof(pclc); + if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) { + vec[i].iov_base = &pclc_smcd; + vec[i++].iov_len = sizeof(pclc_smcd); + } vec[i].iov_base = &pclc_prfx; vec[i++].iov_len = sizeof(pclc_prfx); if (pclc_prfx.ipv6_prefixes_cnt > 0) { @@ -428,35 +465,56 @@ int smc_clc_send_confirm(struct smc_sock *smc) struct kvec vec; int len; - link = &conn->lgr->lnk[SMC_SINGLE_LINK]; /* send SMC Confirm CLC msg */ memset(&cclc, 0, sizeof(cclc)); - memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); cclc.hdr.type = SMC_CLC_CONFIRM; - cclc.hdr.length = htons(sizeof(cclc)); cclc.hdr.version = SMC_CLC_V1; /* SMC version */ - memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], - SMC_GID_SIZE); - memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); - hton24(cclc.qpn, link->roce_qp->qp_num); - cclc.rmb_rkey = - htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); - cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ - cclc.rmbe_alert_token = htonl(conn->alert_token_local); - cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); - cclc.rmbe_size = conn->rmbe_size_short; - cclc.rmb_dma_addr = cpu_to_be64( - (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); - hton24(cclc.psn, link->psn_initial); - - memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + if (smc->conn.lgr->is_smcd) { + /* SMC-D specific settings */ + memcpy(cclc.hdr.eyecatcher, SMCD_EYECATCHER, + sizeof(SMCD_EYECATCHER)); + cclc.hdr.path = SMC_TYPE_D; + cclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); + cclc.gid = conn->lgr->smcd->local_gid; + cclc.token = conn->rmb_desc->token; + cclc.dmbe_size = conn->rmbe_size_short; + cclc.dmbe_idx = 0; + memcpy(&cclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); + memcpy(cclc.smcd_trl.eyecatcher, SMCD_EYECATCHER, + sizeof(SMCD_EYECATCHER)); + } else { + /* SMC-R specific settings */ + link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + cclc.hdr.path = SMC_TYPE_R; + cclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); + memcpy(cclc.lcl.id_for_peer, local_systemid, + sizeof(local_systemid)); + memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], + SMC_GID_SIZE); + memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], + ETH_ALEN); + hton24(cclc.qpn, link->roce_qp->qp_num); + cclc.rmb_rkey = + htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ + cclc.rmbe_alert_token = htonl(conn->alert_token_local); + cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); + cclc.rmbe_size = conn->rmbe_size_short; + cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address + (conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); + hton24(cclc.psn, link->psn_initial); + memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + } memset(&msg, 0, sizeof(msg)); vec.iov_base = &cclc; - vec.iov_len = sizeof(cclc); - len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc)); - if (len < sizeof(cclc)) { + vec.iov_len = ntohs(cclc.hdr.length); + len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, + ntohs(cclc.hdr.length)); + if (len < ntohs(cclc.hdr.length)) { if (len >= 0) { reason_code = -ENETUNREACH; smc->sk.sk_err = -reason_code; @@ -479,35 +537,58 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) int rc = 0; int len; - link = &conn->lgr->lnk[SMC_SINGLE_LINK]; memset(&aclc, 0, sizeof(aclc)); - memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); aclc.hdr.type = SMC_CLC_ACCEPT; - aclc.hdr.length = htons(sizeof(aclc)); aclc.hdr.version = SMC_CLC_V1; /* SMC version */ if (srv_first_contact) aclc.hdr.flag = 1; - memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], - SMC_GID_SIZE); - memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); - hton24(aclc.qpn, link->roce_qp->qp_num); - aclc.rmb_rkey = - htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); - aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */ - aclc.rmbe_alert_token = htonl(conn->alert_token_local); - aclc.qp_mtu = link->path_mtu; - aclc.rmbe_size = conn->rmbe_size_short, - aclc.rmb_dma_addr = cpu_to_be64( - (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); - hton24(aclc.psn, link->psn_initial); - memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + + if (new_smc->conn.lgr->is_smcd) { + /* SMC-D specific settings */ + aclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); + memcpy(aclc.hdr.eyecatcher, SMCD_EYECATCHER, + sizeof(SMCD_EYECATCHER)); + aclc.hdr.path = SMC_TYPE_D; + aclc.gid = conn->lgr->smcd->local_gid; + aclc.token = conn->rmb_desc->token; + aclc.dmbe_size = conn->rmbe_size_short; + aclc.dmbe_idx = 0; + memcpy(&aclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); + memcpy(aclc.smcd_trl.eyecatcher, SMCD_EYECATCHER, + sizeof(SMCD_EYECATCHER)); + } else { + /* SMC-R specific settings */ + aclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); + memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + aclc.hdr.path = SMC_TYPE_R; + link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + memcpy(aclc.lcl.id_for_peer, local_systemid, + sizeof(local_systemid)); + memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], + SMC_GID_SIZE); + memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], + ETH_ALEN); + hton24(aclc.qpn, link->roce_qp->qp_num); + aclc.rmb_rkey = + htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */ + aclc.rmbe_alert_token = htonl(conn->alert_token_local); + aclc.qp_mtu = link->path_mtu; + aclc.rmbe_size = conn->rmbe_size_short, + aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address + (conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); + hton24(aclc.psn, link->psn_initial); + memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + } memset(&msg, 0, sizeof(msg)); vec.iov_base = &aclc; - vec.iov_len = sizeof(aclc); - len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc)); - if (len < sizeof(aclc)) { + vec.iov_len = ntohs(aclc.hdr.length); + len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, + ntohs(aclc.hdr.length)); + if (len < ntohs(aclc.hdr.length)) { if (len >= 0) new_smc->sk.sk_err = EPROTO; else diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 41ff9ea96139..100e988ad1a8 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -23,6 +23,9 @@ #define SMC_CLC_DECLINE 0x04 #define SMC_CLC_V1 0x1 /* SMC version */ +#define SMC_TYPE_R 0 /* SMC-R only */ +#define SMC_TYPE_D 1 /* SMC-D only */ +#define SMC_TYPE_B 3 /* SMC-R and SMC-D */ #define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */ #define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */ #define SMC_CLC_DECL_TIMEOUT 0x02000000 /* timeout */ @@ -42,9 +45,11 @@ struct smc_clc_msg_hdr { /* header1 of clc messages */ #if defined(__BIG_ENDIAN_BITFIELD) u8 version : 4, flag : 1, - rsvd : 3; + rsvd : 1, + path : 2; #elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 rsvd : 3, + u8 path : 2, + rsvd : 1, flag : 1, version : 4; #endif @@ -77,6 +82,11 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */ } __aligned(4); +struct smc_clc_msg_smcd { /* SMC-D GID information */ + u64 gid; /* ISM GID of requestor */ + u8 res[32]; +}; + struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ struct smc_clc_msg_hdr hdr; struct smc_clc_msg_local lcl; @@ -94,23 +104,45 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; - struct smc_clc_msg_local lcl; - u8 qpn[3]; /* QP number */ - __be32 rmb_rkey; /* RMB rkey */ - u8 rmbe_idx; /* Index of RMBE in RMB */ - __be32 rmbe_alert_token;/* unique connection id */ + union { + struct { /* SMC-R */ + struct smc_clc_msg_local lcl; + u8 qpn[3]; /* QP number */ + __be32 rmb_rkey; /* RMB rkey */ + u8 rmbe_idx; /* Index of RMBE in RMB */ + __be32 rmbe_alert_token;/* unique connection id */ #if defined(__BIG_ENDIAN_BITFIELD) - u8 rmbe_size : 4, /* RMBE buf size (compressed notation) */ - qp_mtu : 4; /* QP mtu */ + u8 rmbe_size : 4, /* buf size (compressed) */ + qp_mtu : 4; /* QP mtu */ #elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 qp_mtu : 4, - rmbe_size : 4; + u8 qp_mtu : 4, + rmbe_size : 4; #endif - u8 reserved; - __be64 rmb_dma_addr; /* RMB virtual address */ - u8 reserved2; - u8 psn[3]; /* initial packet sequence number */ - struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ + u8 reserved; + __be64 rmb_dma_addr; /* RMB virtual address */ + u8 reserved2; + u8 psn[3]; /* packet sequence number */ + struct smc_clc_msg_trail smcr_trl; + /* eye catcher "SMCR" EBCDIC */ + } __packed; + struct { /* SMC-D */ + u64 gid; /* Sender GID */ + u64 token; /* DMB token */ + u8 dmbe_idx; /* DMBE index */ +#if defined(__BIG_ENDIAN_BITFIELD) + u8 dmbe_size : 4, /* buf size (compressed) */ + reserved3 : 4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 reserved3 : 4, + dmbe_size : 4; +#endif + u16 reserved4; + u32 linkid; /* Link identifier */ + u32 reserved5[3]; + struct smc_clc_msg_trail smcd_trl; + /* eye catcher "SMCD" EBCDIC */ + } __packed; + }; } __packed; /* format defined in RFC7609 */ struct smc_clc_msg_decline { /* clc decline message */ @@ -129,13 +161,26 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); } +/* get SMC-D info from proposal message */ +static inline struct smc_clc_msg_smcd * +smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop) +{ + if (ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd)) + return NULL; + + return (struct smc_clc_msg_smcd *)(prop + 1); +} + +struct smcd_dev; + int smc_clc_prfx_match(struct socket *clcsock, struct smc_clc_msg_proposal_prefix *prop); int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type); int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); -int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev, - u8 ibport); +int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, + struct smc_ib_device *smcibdev, u8 ibport, + struct smcd_dev *ismdev); int smc_clc_send_confirm(struct smc_sock *smc); int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index add82b0266f3..66741e61a3b0 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -25,6 +25,7 @@ #include "smc_llc.h" #include "smc_cdc.h" #include "smc_close.h" +#include "smc_ism.h" #define SMC_LGR_NUM_INCR 256 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) @@ -46,8 +47,8 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) * otherwise there is a risk of out-of-sync link groups. */ mod_delayed_work(system_wq, &lgr->free_work, - lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : - SMC_LGR_FREE_DELAY_SERV); + (!lgr->is_smcd && lgr->role == SMC_CLNT) ? + SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV); } /* Register connection's alert token in our lookup structure. @@ -153,16 +154,18 @@ static void smc_lgr_free_work(struct work_struct *work) free: spin_unlock_bh(&smc_lgr_list.lock); if (!delayed_work_pending(&lgr->free_work)) { - if (lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE) + if (!lgr->is_smcd && + lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE) smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); smc_lgr_free(lgr); } } /* create a new SMC link group */ -static int smc_lgr_create(struct smc_sock *smc, +static int smc_lgr_create(struct smc_sock *smc, bool is_smcd, struct smc_ib_device *smcibdev, u8 ibport, - char *peer_systemid, unsigned short vlan_id) + char *peer_systemid, unsigned short vlan_id, + struct smcd_dev *smcismdev, u64 peer_gid) { struct smc_link_group *lgr; struct smc_link *lnk; @@ -170,17 +173,23 @@ static int smc_lgr_create(struct smc_sock *smc, int rc = 0; int i; + if (is_smcd && vlan_id) { + rc = smc_ism_get_vlan(smcismdev, vlan_id); + if (rc) + goto out; + } + lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); if (!lgr) { rc = -ENOMEM; goto out; } - lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; + lgr->is_smcd = is_smcd; lgr->sync_err = 0; - memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); lgr->vlan_id = vlan_id; rwlock_init(&lgr->sndbufs_lock); rwlock_init(&lgr->rmbs_lock); + rwlock_init(&lgr->conns_lock); for (i = 0; i < SMC_RMBE_SIZES; i++) { INIT_LIST_HEAD(&lgr->sndbufs[i]); INIT_LIST_HEAD(&lgr->rmbs[i]); @@ -189,36 +198,44 @@ static int smc_lgr_create(struct smc_sock *smc, memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); lgr->conns_all = RB_ROOT; - - lnk = &lgr->lnk[SMC_SINGLE_LINK]; - /* initialize link */ - lnk->state = SMC_LNK_ACTIVATING; - lnk->link_id = SMC_SINGLE_LINK; - lnk->smcibdev = smcibdev; - lnk->ibport = ibport; - lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; - if (!smcibdev->initialized) - smc_ib_setup_per_ibdev(smcibdev); - get_random_bytes(rndvec, sizeof(rndvec)); - lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); - rc = smc_llc_link_init(lnk); - if (rc) - goto free_lgr; - rc = smc_wr_alloc_link_mem(lnk); - if (rc) - goto clear_llc_lnk; - rc = smc_ib_create_protection_domain(lnk); - if (rc) - goto free_link_mem; - rc = smc_ib_create_queue_pair(lnk); - if (rc) - goto dealloc_pd; - rc = smc_wr_create_link(lnk); - if (rc) - goto destroy_qp; - + if (is_smcd) { + /* SMC-D specific settings */ + lgr->peer_gid = peer_gid; + lgr->smcd = smcismdev; + } else { + /* SMC-R specific settings */ + lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; + memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); + + lnk = &lgr->lnk[SMC_SINGLE_LINK]; + /* initialize link */ + lnk->state = SMC_LNK_ACTIVATING; + lnk->link_id = SMC_SINGLE_LINK; + lnk->smcibdev = smcibdev; + lnk->ibport = ibport; + lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; + if (!smcibdev->initialized) + smc_ib_setup_per_ibdev(smcibdev); + get_random_bytes(rndvec, sizeof(rndvec)); + lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + + (rndvec[2] << 16); + rc = smc_llc_link_init(lnk); + if (rc) + goto free_lgr; + rc = smc_wr_alloc_link_mem(lnk); + if (rc) + goto clear_llc_lnk; + rc = smc_ib_create_protection_domain(lnk); + if (rc) + goto free_link_mem; + rc = smc_ib_create_queue_pair(lnk); + if (rc) + goto dealloc_pd; + rc = smc_wr_create_link(lnk); + if (rc) + goto destroy_qp; + } smc->conn.lgr = lgr; - rwlock_init(&lgr->conns_lock); spin_lock_bh(&smc_lgr_list.lock); list_add(&lgr->list, &smc_lgr_list.list); spin_unlock_bh(&smc_lgr_list.lock); @@ -264,7 +281,12 @@ void smc_conn_free(struct smc_connection *conn) { if (!conn->lgr) return; - smc_cdc_tx_dismiss_slots(conn); + if (conn->lgr->is_smcd) { + smc_ism_unset_conn(conn); + tasklet_kill(&conn->rx_tsklet); + } else { + smc_cdc_tx_dismiss_slots(conn); + } smc_lgr_unregister_conn(conn); smc_buf_unuse(conn); } @@ -280,8 +302,8 @@ static void smc_link_clear(struct smc_link *lnk) smc_wr_free_link_mem(lnk); } -static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, - struct smc_buf_desc *buf_desc) +static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, + struct smc_buf_desc *buf_desc) { struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; @@ -301,6 +323,28 @@ static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, kfree(buf_desc); } +static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, + struct smc_buf_desc *buf_desc) +{ + if (is_dmb) { + /* restore original buf len */ + buf_desc->len += sizeof(struct smcd_cdc_msg); + smc_ism_unregister_dmb(lgr->smcd, buf_desc); + } else { + kfree(buf_desc->cpu_addr); + } + kfree(buf_desc); +} + +static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, + struct smc_buf_desc *buf_desc) +{ + if (lgr->is_smcd) + smcd_buf_free(lgr, is_rmb, buf_desc); + else + smcr_buf_free(lgr, is_rmb, buf_desc); +} + static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) { struct smc_buf_desc *buf_desc, *bf_desc; @@ -332,7 +376,10 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr) void smc_lgr_free(struct smc_link_group *lgr) { smc_lgr_free_bufs(lgr); - smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); + if (lgr->is_smcd) + smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); + else + smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); kfree(lgr); } @@ -357,7 +404,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) lgr->terminating = 1; if (!list_empty(&lgr->list)) /* forget lgr */ list_del_init(&lgr->list); - smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); + if (!lgr->is_smcd) + smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); write_lock_bh(&lgr->conns_lock); node = rb_first(&lgr->conns_all); @@ -374,7 +422,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) node = rb_first(&lgr->conns_all); } write_unlock_bh(&lgr->conns_lock); - wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); + if (!lgr->is_smcd) + wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); smc_lgr_schedule_free_work(lgr); } @@ -392,17 +441,44 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) spin_lock_bh(&smc_lgr_list.lock); list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { - if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && + if (!lgr->is_smcd && + lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) __smc_lgr_terminate(lgr); } spin_unlock_bh(&smc_lgr_list.lock); } +/* Called when SMC-D device is terminated or peer is lost */ +void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid) +{ + struct smc_link_group *lgr, *l; + LIST_HEAD(lgr_free_list); + + /* run common cleanup function and build free list */ + spin_lock_bh(&smc_lgr_list.lock); + list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { + if (lgr->is_smcd && lgr->smcd == dev && + (!peer_gid || lgr->peer_gid == peer_gid) && + !list_empty(&lgr->list)) { + __smc_lgr_terminate(lgr); + list_move(&lgr->list, &lgr_free_list); + } + } + spin_unlock_bh(&smc_lgr_list.lock); + + /* cancel the regular free workers and actually free lgrs */ + list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { + list_del_init(&lgr->list); + cancel_delayed_work_sync(&lgr->free_work); + smc_lgr_free(lgr); + } +} + /* Determine vlan of internal TCP socket. * @vlan_id: address to store the determined vlan id into */ -static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) +int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) { struct dst_entry *dst = sk_dst_get(clcsock->sk); struct net_device *ndev; @@ -477,10 +553,30 @@ static int smc_link_determine_gid(struct smc_link_group *lgr) return -ENODEV; } +static bool smcr_lgr_match(struct smc_link_group *lgr, + struct smc_clc_msg_local *lcl, + enum smc_lgr_role role) +{ + return !memcmp(lgr->peer_systemid, lcl->id_for_peer, + SMC_SYSTEMID_LEN) && + !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, + SMC_GID_SIZE) && + !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, + sizeof(lcl->mac)) && + lgr->role == role; +} + +static bool smcd_lgr_match(struct smc_link_group *lgr, + struct smcd_dev *smcismdev, u64 peer_gid) +{ + return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; +} + /* create a new SMC connection (and a new link group if necessary) */ -int smc_conn_create(struct smc_sock *smc, +int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, struct smc_ib_device *smcibdev, u8 ibport, - struct smc_clc_msg_local *lcl, int srv_first_contact) + struct smc_clc_msg_local *lcl, struct smcd_dev *smcd, + u64 peer_gid) { struct smc_connection *conn = &smc->conn; int local_contact = SMC_FIRST_CONTACT; @@ -502,17 +598,12 @@ int smc_conn_create(struct smc_sock *smc, spin_lock_bh(&smc_lgr_list.lock); list_for_each_entry(lgr, &smc_lgr_list.list, list) { write_lock_bh(&lgr->conns_lock); - if (!memcmp(lgr->peer_systemid, lcl->id_for_peer, - SMC_SYSTEMID_LEN) && - !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, - SMC_GID_SIZE) && - !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, - sizeof(lcl->mac)) && + if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) : + smcr_lgr_match(lgr, lcl, role)) && !lgr->sync_err && - (lgr->role == role) && - (lgr->vlan_id == vlan_id) && - ((role == SMC_CLNT) || - (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) { + lgr->vlan_id == vlan_id && + (role == SMC_CLNT || + lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { /* link group found */ local_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; @@ -535,16 +626,21 @@ int smc_conn_create(struct smc_sock *smc, create: if (local_contact == SMC_FIRST_CONTACT) { - rc = smc_lgr_create(smc, smcibdev, ibport, - lcl->id_for_peer, vlan_id); + rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport, + lcl->id_for_peer, vlan_id, smcd, peer_gid); if (rc) goto out; smc_lgr_register_conn(conn); /* add smc conn to lgr */ - rc = smc_link_determine_gid(conn->lgr); + if (!is_smcd) + rc = smc_link_determine_gid(conn->lgr); } conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; + if (is_smcd) { + conn->rx_off = sizeof(struct smcd_cdc_msg); + smcd_cdc_rx_init(conn); /* init tasklet for this conn */ + } #ifndef KERNEL_HAS_ATOMIC64 spin_lock_init(&conn->acurs_lock); #endif @@ -609,8 +705,8 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size) return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); } -static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, - bool is_rmb, int bufsize) +static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, + bool is_rmb, int bufsize) { struct smc_buf_desc *buf_desc; struct smc_link *lnk; @@ -668,7 +764,44 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, return buf_desc; } -static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) +#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ + +static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, + bool is_dmb, int bufsize) +{ + struct smc_buf_desc *buf_desc; + int rc; + + if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) + return ERR_PTR(-EAGAIN); + + /* try to alloc a new DMB */ + buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); + if (!buf_desc) + return ERR_PTR(-ENOMEM); + if (is_dmb) { + rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); + if (rc) { + kfree(buf_desc); + return ERR_PTR(-EAGAIN); + } + buf_desc->pages = virt_to_page(buf_desc->cpu_addr); + /* CDC header stored in buf. So, pretend it was smaller */ + buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); + } else { + buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | + __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC); + if (!buf_desc->cpu_addr) { + kfree(buf_desc); + return ERR_PTR(-EAGAIN); + } + buf_desc->len = bufsize; + } + return buf_desc; +} + +static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) { struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); struct smc_connection *conn = &smc->conn; @@ -706,7 +839,11 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) break; /* found reusable slot */ } - buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize); + if (is_smcd) + buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); + else + buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); + if (PTR_ERR(buf_desc) == -ENOMEM) break; if (IS_ERR(buf_desc)) @@ -727,7 +864,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) conn->rmbe_size_short = bufsize_short; smc->sk.sk_rcvbuf = bufsize * 2; atomic_set(&conn->bytes_to_rcv, 0); - conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); + conn->rmbe_update_limit = + smc_rmb_wnd_update_limit(buf_desc->len); + if (is_smcd) + smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ } else { conn->sndbuf_desc = buf_desc; smc->sk.sk_sndbuf = bufsize * 2; @@ -740,6 +880,8 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; + if (!conn->lgr || conn->lgr->is_smcd) + return; smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, conn->sndbuf_desc, DMA_TO_DEVICE); } @@ -748,6 +890,8 @@ void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; + if (!conn->lgr || conn->lgr->is_smcd) + return; smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, conn->sndbuf_desc, DMA_TO_DEVICE); } @@ -756,6 +900,8 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; + if (!conn->lgr || conn->lgr->is_smcd) + return; smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, conn->rmb_desc, DMA_FROM_DEVICE); } @@ -764,6 +910,8 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; + if (!conn->lgr || conn->lgr->is_smcd) + return; smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, conn->rmb_desc, DMA_FROM_DEVICE); } @@ -774,16 +922,16 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn) * the Linux implementation uses just one RMB-element per RMB, i.e. uses an * extra RMB for every connection in a link group */ -int smc_buf_create(struct smc_sock *smc) +int smc_buf_create(struct smc_sock *smc, bool is_smcd) { int rc; /* create send buffer */ - rc = __smc_buf_create(smc, false); + rc = __smc_buf_create(smc, is_smcd, false); if (rc) return rc; /* create rmb */ - rc = __smc_buf_create(smc, true); + rc = __smc_buf_create(smc, is_smcd, true); if (rc) smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); return rc; @@ -865,7 +1013,8 @@ void smc_core_exit(void) spin_unlock_bh(&smc_lgr_list.lock); list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { list_del_init(&lgr->list); - smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); + if (!lgr->is_smcd) + smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); cancel_delayed_work_sync(&lgr->free_work); smc_lgr_free(lgr); /* free link group */ } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 93cb3523bf50..8b47e0168fc3 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -124,15 +124,28 @@ struct smc_buf_desc { void *cpu_addr; /* virtual address of buffer */ struct page *pages; int len; /* length of buffer */ - struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */ - struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; - /* for rmb only: memory region - * incl. rkey provided to peer - */ - u32 order; /* allocation order */ u32 used; /* currently used / unused */ u8 reused : 1; /* new created / reused */ u8 regerr : 1; /* err during registration */ + union { + struct { /* SMC-R */ + struct sg_table sgt[SMC_LINKS_PER_LGR_MAX]; + /* virtual buffer */ + struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; + /* for rmb only: memory region + * incl. rkey provided to peer + */ + u32 order; /* allocation order */ + }; + struct { /* SMC-D */ + unsigned short sba_idx; + /* SBA index number */ + u64 token; + /* DMB token number */ + dma_addr_t dma_addr; + /* DMA address */ + }; + }; }; struct smc_rtoken { /* address/key of remote RMB */ @@ -148,12 +161,10 @@ struct smc_rtoken { /* address/key of remote RMB */ * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15) */ +struct smcd_dev; + struct smc_link_group { struct list_head list; - enum smc_lgr_role role; /* client or server */ - struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */ - char peer_systemid[SMC_SYSTEMID_LEN]; - /* unique system_id of peer */ struct rb_root conns_all; /* connection tree */ rwlock_t conns_lock; /* protects conns_all */ unsigned int conns_num; /* current # of connections */ @@ -163,17 +174,35 @@ struct smc_link_group { rwlock_t sndbufs_lock; /* protects tx buffers */ struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */ rwlock_t rmbs_lock; /* protects rx buffers */ - struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX] - [SMC_LINKS_PER_LGR_MAX]; - /* remote addr/key pairs */ - unsigned long rtokens_used_mask[BITS_TO_LONGS( - SMC_RMBS_PER_LGR_MAX)]; - /* used rtoken elements */ u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */ struct delayed_work free_work; /* delayed freeing of an lgr */ u8 sync_err : 1; /* lgr no longer fits to peer */ u8 terminating : 1;/* lgr is terminating */ + + bool is_smcd; /* SMC-R or SMC-D */ + union { + struct { /* SMC-R */ + enum smc_lgr_role role; + /* client or server */ + struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; + /* smc link */ + char peer_systemid[SMC_SYSTEMID_LEN]; + /* unique system_id of peer */ + struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX] + [SMC_LINKS_PER_LGR_MAX]; + /* remote addr/key pairs */ + unsigned long rtokens_used_mask[BITS_TO_LONGS + (SMC_RMBS_PER_LGR_MAX)]; + /* used rtoken elements */ + }; + struct { /* SMC-D */ + u64 peer_gid; + /* Peer GID (remote) */ + struct smcd_dev *smcd; + /* ISM device for VLAN reg. */ + }; + }; }; /* Find the connection associated with the given alert token in the link group. @@ -217,7 +246,8 @@ void smc_lgr_free(struct smc_link_group *lgr); void smc_lgr_forget(struct smc_link_group *lgr); void smc_lgr_terminate(struct smc_link_group *lgr); void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport); -int smc_buf_create(struct smc_sock *smc); +void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid); +int smc_buf_create(struct smc_sock *smc, bool is_smcd); int smc_uncompress_bufsize(u8 compressed); int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_clc_msg_accept_confirm *clc); @@ -227,9 +257,13 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); void smc_rmb_sync_sg_for_device(struct smc_connection *conn); +int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id); + void smc_conn_free(struct smc_connection *conn); -int smc_conn_create(struct smc_sock *smc, +int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, struct smc_ib_device *smcibdev, u8 ibport, - struct smc_clc_msg_local *lcl, int srv_first_contact); + struct smc_clc_msg_local *lcl, struct smcd_dev *smcd, + u64 peer_gid); +void smcd_conn_free(struct smc_connection *conn); void smc_core_exit(void); #endif diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 839354402215..6d83eef1b743 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -136,7 +136,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, goto errout; } - if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr && + if (smc->conn.lgr && !smc->conn.lgr->is_smcd && + (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, @@ -155,6 +156,21 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0) goto errout; } + if (smc->conn.lgr && smc->conn.lgr->is_smcd && + (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && + !list_empty(&smc->conn.lgr->list)) { + struct smc_connection *conn = &smc->conn; + struct smcd_diag_dmbinfo dinfo = { + .linkid = *((u32 *)conn->lgr->id), + .peer_gid = conn->lgr->peer_gid, + .my_gid = conn->lgr->smcd->local_gid, + .token = conn->rmb_desc->token, + .peer_token = conn->peer_token + }; + + if (nla_put(skb, SMC_DIAG_DMBINFO, sizeof(dinfo), &dinfo) < 0) + goto errout; + } nlmsg_end(skb, nlh); return 0; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 0eed7ab9f28b..36de2fd76170 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -143,6 +143,62 @@ out: return rc; } +static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) +{ + struct ib_gid_attr gattr; + int rc; + + rc = ib_query_gid(smcibdev->ibdev, ibport, 0, + &smcibdev->gid[ibport - 1], &gattr); + if (rc || !gattr.ndev) + return -ENODEV; + + memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN); + dev_put(gattr.ndev); + return 0; +} + +/* Create an identifier unique for this instance of SMC-R. + * The MAC-address of the first active registered IB device + * plus a random 2-byte number is used to create this identifier. + * This name is delivered to the peer during connection initialization. + */ +static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev, + u8 ibport) +{ + memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1], + sizeof(smcibdev->mac[ibport - 1])); + get_random_bytes(&local_systemid[0], 2); +} + +bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) +{ + return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; +} + +static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) +{ + int rc; + + memset(&smcibdev->pattr[ibport - 1], 0, + sizeof(smcibdev->pattr[ibport - 1])); + rc = ib_query_port(smcibdev->ibdev, ibport, + &smcibdev->pattr[ibport - 1]); + if (rc) + goto out; + /* the SMC protocol requires specification of the RoCE MAC address */ + rc = smc_ib_fill_gid_and_mac(smcibdev, ibport); + if (rc) + goto out; + if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET, + sizeof(local_systemid)) && + smc_ib_port_active(smcibdev, ibport)) + /* create unique system identifier */ + smc_ib_define_local_systemid(smcibdev, ibport); +out: + return rc; +} + /* process context wrapper for might_sleep smc_ib_remember_port_attr */ static void smc_ib_port_event_work(struct work_struct *work) { @@ -370,62 +426,6 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev, buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0; } -static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) -{ - struct ib_gid_attr gattr; - int rc; - - rc = ib_query_gid(smcibdev->ibdev, ibport, 0, - &smcibdev->gid[ibport - 1], &gattr); - if (rc || !gattr.ndev) - return -ENODEV; - - memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN); - dev_put(gattr.ndev); - return 0; -} - -/* Create an identifier unique for this instance of SMC-R. - * The MAC-address of the first active registered IB device - * plus a random 2-byte number is used to create this identifier. - * This name is delivered to the peer during connection initialization. - */ -static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev, - u8 ibport) -{ - memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1], - sizeof(smcibdev->mac[ibport - 1])); - get_random_bytes(&local_systemid[0], 2); -} - -bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) -{ - return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; -} - -int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) -{ - int rc; - - memset(&smcibdev->pattr[ibport - 1], 0, - sizeof(smcibdev->pattr[ibport - 1])); - rc = ib_query_port(smcibdev->ibdev, ibport, - &smcibdev->pattr[ibport - 1]); - if (rc) - goto out; - /* the SMC protocol requires specification of the RoCE MAC address */ - rc = smc_ib_fill_gid_and_mac(smcibdev, ibport); - if (rc) - goto out; - if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET, - sizeof(local_systemid)) && - smc_ib_port_active(smcibdev, ibport)) - /* create unique system identifier */ - smc_ib_define_local_systemid(smcibdev, ibport); -out: - return rc; -} - long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) { struct ib_cq_init_attr cqattr = { @@ -454,9 +454,6 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) smcibdev->roce_cq_recv = NULL; goto err; } - INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev, - smc_ib_global_event_handler); - ib_register_event_handler(&smcibdev->event_handler); smc_wr_add_dev(smcibdev); smcibdev->initialized = 1; return rc; @@ -472,7 +469,6 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) return; smcibdev->initialized = 0; smc_wr_remove_dev(smcibdev); - ib_unregister_event_handler(&smcibdev->event_handler); ib_destroy_cq(smcibdev->roce_cq_recv); ib_destroy_cq(smcibdev->roce_cq_send); } @@ -483,6 +479,8 @@ static struct ib_client smc_ib_client; static void smc_ib_add_dev(struct ib_device *ibdev) { struct smc_ib_device *smcibdev; + u8 port_cnt; + int i; if (ibdev->node_type != RDMA_NODE_IB_CA) return; @@ -498,6 +496,21 @@ static void smc_ib_add_dev(struct ib_device *ibdev) list_add_tail(&smcibdev->list, &smc_ib_devices.list); spin_unlock(&smc_ib_devices.lock); ib_set_client_data(ibdev, &smc_ib_client, smcibdev); + INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev, + smc_ib_global_event_handler); + ib_register_event_handler(&smcibdev->event_handler); + + /* trigger reading of the port attributes */ + port_cnt = smcibdev->ibdev->phys_port_cnt; + for (i = 0; + i < min_t(size_t, port_cnt, SMC_MAX_PORTS); + i++) { + set_bit(i, &smcibdev->port_event_mask); + /* determine pnetids of the port */ + smc_pnetid_by_dev_port(ibdev->dev.parent, i, + smcibdev->pnetid[i]); + } + schedule_work(&smcibdev->port_event_work); } /* callback function for ib_register_client() */ @@ -512,6 +525,7 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) spin_unlock(&smc_ib_devices.lock); smc_pnet_remove_by_ibdev(smcibdev); smc_ib_cleanup_per_ibdev(smcibdev); + ib_unregister_event_handler(&smcibdev->event_handler); kfree(smcibdev); } diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index e90630dadf8e..7c1223c91229 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -15,6 +15,7 @@ #include <linux/interrupt.h> #include <linux/if_ether.h> #include <rdma/ib_verbs.h> +#include <net/smc.h> #define SMC_MAX_PORTS 2 /* Max # of ports */ #define SMC_GID_SIZE sizeof(union ib_gid) @@ -40,6 +41,8 @@ struct smc_ib_device { /* ib-device infos for smc */ char mac[SMC_MAX_PORTS][ETH_ALEN]; /* mac address per port*/ union ib_gid gid[SMC_MAX_PORTS]; /* gid per port */ + u8 pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN]; + /* pnetid per port */ u8 initialized : 1; /* ib dev CQ, evthdl done */ struct work_struct port_event_work; unsigned long port_event_mask; @@ -51,7 +54,6 @@ struct smc_link; int smc_ib_register_client(void) __init; void smc_ib_unregister_client(void); bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport); -int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport); int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction); diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c new file mode 100644 index 000000000000..cfade7fdcc6d --- /dev/null +++ b/net/smc/smc_ism.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Shared Memory Communications Direct over ISM devices (SMC-D) + * + * Functions for ISM device. + * + * Copyright IBM Corp. 2018 + */ + +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <asm/page.h> + +#include "smc.h" +#include "smc_core.h" +#include "smc_ism.h" +#include "smc_pnet.h" + +struct smcd_dev_list smcd_dev_list = { + .list = LIST_HEAD_INIT(smcd_dev_list.list), + .lock = __SPIN_LOCK_UNLOCKED(smcd_dev_list.lock) +}; + +/* Test if an ISM communication is possible. */ +int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) +{ + return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0, + vlan_id); +} + +int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos, + void *data, size_t len) +{ + int rc; + + rc = smcd->ops->move_data(smcd, pos->token, pos->index, pos->signal, + pos->offset, data, len); + + return rc < 0 ? rc : 0; +} + +/* Set a connection using this DMBE. */ +void smc_ism_set_conn(struct smc_connection *conn) +{ + unsigned long flags; + + spin_lock_irqsave(&conn->lgr->smcd->lock, flags); + conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = conn; + spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags); +} + +/* Unset a connection using this DMBE. */ +void smc_ism_unset_conn(struct smc_connection *conn) +{ + unsigned long flags; + + if (!conn->rmb_desc) + return; + + spin_lock_irqsave(&conn->lgr->smcd->lock, flags); + conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = NULL; + spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags); +} + +/* Register a VLAN identifier with the ISM device. Use a reference count + * and add a VLAN identifier only when the first DMB using this VLAN is + * registered. + */ +int smc_ism_get_vlan(struct smcd_dev *smcd, unsigned short vlanid) +{ + struct smc_ism_vlanid *new_vlan, *vlan; + unsigned long flags; + int rc = 0; + + if (!vlanid) /* No valid vlan id */ + return -EINVAL; + + /* create new vlan entry, in case we need it */ + new_vlan = kzalloc(sizeof(*new_vlan), GFP_KERNEL); + if (!new_vlan) + return -ENOMEM; + new_vlan->vlanid = vlanid; + refcount_set(&new_vlan->refcnt, 1); + + /* if there is an existing entry, increase count and return */ + spin_lock_irqsave(&smcd->lock, flags); + list_for_each_entry(vlan, &smcd->vlan, list) { + if (vlan->vlanid == vlanid) { + refcount_inc(&vlan->refcnt); + kfree(new_vlan); + goto out; + } + } + + /* no existing entry found. + * add new entry to device; might fail, e.g., if HW limit reached + */ + if (smcd->ops->add_vlan_id(smcd, vlanid)) { + kfree(new_vlan); + rc = -EIO; + goto out; + } + list_add_tail(&new_vlan->list, &smcd->vlan); +out: + spin_unlock_irqrestore(&smcd->lock, flags); + return rc; +} + +/* Unregister a VLAN identifier with the ISM device. Use a reference count + * and remove a VLAN identifier only when the last DMB using this VLAN is + * unregistered. + */ +int smc_ism_put_vlan(struct smcd_dev *smcd, unsigned short vlanid) +{ + struct smc_ism_vlanid *vlan; + unsigned long flags; + bool found = false; + int rc = 0; + + if (!vlanid) /* No valid vlan id */ + return -EINVAL; + + spin_lock_irqsave(&smcd->lock, flags); + list_for_each_entry(vlan, &smcd->vlan, list) { + if (vlan->vlanid == vlanid) { + if (!refcount_dec_and_test(&vlan->refcnt)) + goto out; + found = true; + break; + } + } + if (!found) { + rc = -ENOENT; + goto out; /* VLAN id not in table */ + } + + /* Found and the last reference just gone */ + if (smcd->ops->del_vlan_id(smcd, vlanid)) + rc = -EIO; + list_del(&vlan->list); + kfree(vlan); +out: + spin_unlock_irqrestore(&smcd->lock, flags); + return rc; +} + +int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc) +{ + struct smcd_dmb dmb; + + memset(&dmb, 0, sizeof(dmb)); + dmb.dmb_tok = dmb_desc->token; + dmb.sba_idx = dmb_desc->sba_idx; + dmb.cpu_addr = dmb_desc->cpu_addr; + dmb.dma_addr = dmb_desc->dma_addr; + dmb.dmb_len = dmb_desc->len; + return smcd->ops->unregister_dmb(smcd, &dmb); +} + +int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len, + struct smc_buf_desc *dmb_desc) +{ + struct smcd_dmb dmb; + int rc; + + memset(&dmb, 0, sizeof(dmb)); + dmb.dmb_len = dmb_len; + dmb.sba_idx = dmb_desc->sba_idx; + dmb.vlan_id = lgr->vlan_id; + dmb.rgid = lgr->peer_gid; + rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb); + if (!rc) { + dmb_desc->sba_idx = dmb.sba_idx; + dmb_desc->token = dmb.dmb_tok; + dmb_desc->cpu_addr = dmb.cpu_addr; + dmb_desc->dma_addr = dmb.dma_addr; + dmb_desc->len = dmb.dmb_len; + } + return rc; +} + +struct smc_ism_event_work { + struct work_struct work; + struct smcd_dev *smcd; + struct smcd_event event; +}; + +/* worker for SMC-D events */ +static void smc_ism_event_work(struct work_struct *work) +{ + struct smc_ism_event_work *wrk = + container_of(work, struct smc_ism_event_work, work); + + switch (wrk->event.type) { + case ISM_EVENT_GID: /* GID event, token is peer GID */ + smc_smcd_terminate(wrk->smcd, wrk->event.tok); + break; + case ISM_EVENT_DMB: + break; + } + kfree(wrk); +} + +static void smcd_release(struct device *dev) +{ + struct smcd_dev *smcd = container_of(dev, struct smcd_dev, dev); + + kfree(smcd->conn); + kfree(smcd); +} + +struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, + const struct smcd_ops *ops, int max_dmbs) +{ + struct smcd_dev *smcd; + + smcd = kzalloc(sizeof(*smcd), GFP_KERNEL); + if (!smcd) + return NULL; + smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *), + GFP_KERNEL); + if (!smcd->conn) { + kfree(smcd); + return NULL; + } + + smcd->dev.parent = parent; + smcd->dev.release = smcd_release; + device_initialize(&smcd->dev); + dev_set_name(&smcd->dev, name); + smcd->ops = ops; + smc_pnetid_by_dev_port(parent, 0, smcd->pnetid); + + spin_lock_init(&smcd->lock); + INIT_LIST_HEAD(&smcd->vlan); + smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", + WQ_MEM_RECLAIM, name); + return smcd; +} +EXPORT_SYMBOL_GPL(smcd_alloc_dev); + +int smcd_register_dev(struct smcd_dev *smcd) +{ + spin_lock(&smcd_dev_list.lock); + list_add_tail(&smcd->list, &smcd_dev_list.list); + spin_unlock(&smcd_dev_list.lock); + + return device_add(&smcd->dev); +} +EXPORT_SYMBOL_GPL(smcd_register_dev); + +void smcd_unregister_dev(struct smcd_dev *smcd) +{ + spin_lock(&smcd_dev_list.lock); + list_del(&smcd->list); + spin_unlock(&smcd_dev_list.lock); + flush_workqueue(smcd->event_wq); + destroy_workqueue(smcd->event_wq); + smc_smcd_terminate(smcd, 0); + + device_del(&smcd->dev); +} +EXPORT_SYMBOL_GPL(smcd_unregister_dev); + +void smcd_free_dev(struct smcd_dev *smcd) +{ + put_device(&smcd->dev); +} +EXPORT_SYMBOL_GPL(smcd_free_dev); + +/* SMCD Device event handler. Called from ISM device interrupt handler. + * Parameters are smcd device pointer, + * - event->type (0 --> DMB, 1 --> GID), + * - event->code (event code), + * - event->tok (either DMB token when event type 0, or GID when event type 1) + * - event->time (time of day) + * - event->info (debug info). + * + * Context: + * - Function called in IRQ context from ISM device driver event handler. + */ +void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event) +{ + struct smc_ism_event_work *wrk; + + /* copy event to event work queue, and let it be handled there */ + wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC); + if (!wrk) + return; + INIT_WORK(&wrk->work, smc_ism_event_work); + wrk->smcd = smcd; + wrk->event = *event; + queue_work(smcd->event_wq, &wrk->work); +} +EXPORT_SYMBOL_GPL(smcd_handle_event); + +/* SMCD Device interrupt handler. Called from ISM device interrupt handler. + * Parameters are smcd device pointer and DMB number. Find the connection and + * schedule the tasklet for this connection. + * + * Context: + * - Function called in IRQ context from ISM device driver IRQ handler. + */ +void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno) +{ + struct smc_connection *conn = NULL; + unsigned long flags; + + spin_lock_irqsave(&smcd->lock, flags); + conn = smcd->conn[dmbno]; + if (conn) + tasklet_schedule(&conn->rx_tsklet); + spin_unlock_irqrestore(&smcd->lock, flags); +} +EXPORT_SYMBOL_GPL(smcd_handle_irq); diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h new file mode 100644 index 000000000000..aee45b860b79 --- /dev/null +++ b/net/smc/smc_ism.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Shared Memory Communications Direct over ISM devices (SMC-D) + * + * SMC-D ISM device structure definitions. + * + * Copyright IBM Corp. 2018 + */ + +#ifndef SMCD_ISM_H +#define SMCD_ISM_H + +#include <linux/uio.h> + +#include "smc.h" + +struct smcd_dev_list { /* List of SMCD devices */ + struct list_head list; + spinlock_t lock; /* Protects list of devices */ +}; + +extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */ + +struct smc_ism_vlanid { /* VLAN id set on ISM device */ + struct list_head list; + unsigned short vlanid; /* Vlan id */ + refcount_t refcnt; /* Reference count */ +}; + +struct smc_ism_position { /* ISM device position to write to */ + u64 token; /* Token of DMB */ + u32 offset; /* Offset into DMBE */ + u8 index; /* Index of DMBE */ + u8 signal; /* Generate interrupt on owner side */ +}; + +struct smcd_dev; + +int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev); +void smc_ism_set_conn(struct smc_connection *conn); +void smc_ism_unset_conn(struct smc_connection *conn); +int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id); +int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id); +int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size, + struct smc_buf_desc *dmb_desc); +int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc); +int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos, + void *data, size_t len); +#endif diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index d7b88b2d1b22..1b6c066d3495 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -22,13 +22,12 @@ #include "smc_pnet.h" #include "smc_ib.h" - -#define SMC_MAX_PNET_ID_LEN 16 /* Max. length of PNET id */ +#include "smc_ism.h" static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { [SMC_PNETID_NAME] = { .type = NLA_NUL_STRING, - .len = SMC_MAX_PNET_ID_LEN - 1 + .len = SMC_MAX_PNETID_LEN - 1 }, [SMC_PNETID_ETHNAME] = { .type = NLA_NUL_STRING, @@ -65,7 +64,7 @@ static struct smc_pnettable { */ struct smc_pnetentry { struct list_head list; - char pnet_name[SMC_MAX_PNET_ID_LEN + 1]; + char pnet_name[SMC_MAX_PNETID_LEN + 1]; struct net_device *ndev; struct smc_ib_device *smcibdev; u8 ib_port; @@ -209,7 +208,7 @@ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) return false; while (--end >= bf && isspace(*end)) ; - if (end - bf >= SMC_MAX_PNET_ID_LEN) + if (end - bf >= SMC_MAX_PNETID_LEN) return false; while (bf <= end) { if (!isalnum(*bf)) @@ -358,9 +357,6 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) kfree(pnetelem); return rc; } - rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port); - if (rc) - smc_pnet_remove_by_pnetid(pnetelem->pnet_name); return rc; } @@ -485,10 +481,10 @@ static int smc_pnet_netdev_event(struct notifier_block *this, case NETDEV_REBOOT: case NETDEV_UNREGISTER: smc_pnet_remove_by_ndev(event_dev); + return NOTIFY_OK; default: - break; + return NOTIFY_DONE; } - return NOTIFY_DONE; } static struct notifier_block smc_netdev_notifier = { @@ -515,26 +511,91 @@ void smc_pnet_exit(void) genl_unregister_family(&smc_pnet_nl_family); } -/* PNET table analysis for a given sock: - * determine ib_device and port belonging to used internal TCP socket - * ethernet interface. +/* Determine one base device for stacked net devices. + * If the lower device level contains more than one devices + * (for instance with bonding slaves), just the first device + * is used to reach a base device. */ -void smc_pnet_find_roce_resource(struct sock *sk, - struct smc_ib_device **smcibdev, u8 *ibport) +static struct net_device *pnet_find_base_ndev(struct net_device *ndev) { - struct dst_entry *dst = sk_dst_get(sk); - struct smc_pnetentry *pnetelem; + int i, nest_lvl; - *smcibdev = NULL; - *ibport = 0; + rtnl_lock(); + nest_lvl = dev_get_nest_level(ndev); + for (i = 0; i < nest_lvl; i++) { + struct list_head *lower = &ndev->adj_list.lower; + + if (list_empty(lower)) + break; + lower = lower->next; + ndev = netdev_lower_get_next(ndev, &lower); + } + rtnl_unlock(); + return ndev; +} + +/* Determine the corresponding IB device port based on the hardware PNETID. + * Searching stops at the first matching active IB device port. + */ +static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, + struct smc_ib_device **smcibdev, + u8 *ibport) +{ + u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; + struct smc_ib_device *ibdev; + int i; + + ndev = pnet_find_base_ndev(ndev); + if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, + ndev_pnetid)) + return; /* pnetid could not be determined */ + + spin_lock(&smc_ib_devices.lock); + list_for_each_entry(ibdev, &smc_ib_devices.list, list) { + for (i = 1; i <= SMC_MAX_PORTS; i++) { + if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid, + SMC_MAX_PNETID_LEN) && + smc_ib_port_active(ibdev, i)) { + *smcibdev = ibdev; + *ibport = i; + break; + } + } + } + spin_unlock(&smc_ib_devices.lock); +} + +static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, + struct smcd_dev **smcismdev) +{ + u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; + struct smcd_dev *ismdev; + + ndev = pnet_find_base_ndev(ndev); + if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, + ndev_pnetid)) + return; /* pnetid could not be determined */ + + spin_lock(&smcd_dev_list.lock); + list_for_each_entry(ismdev, &smcd_dev_list.list, list) { + if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) { + *smcismdev = ismdev; + break; + } + } + spin_unlock(&smcd_dev_list.lock); +} + +/* Lookup of coupled ib_device via SMC pnet table */ +static void smc_pnet_find_roce_by_table(struct net_device *netdev, + struct smc_ib_device **smcibdev, + u8 *ibport) +{ + struct smc_pnetentry *pnetelem; - if (!dst) - return; - if (!dst->dev) - goto out_rel; read_lock(&smc_pnettable.lock); list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { - if (dst->dev == pnetelem->ndev) { + if (netdev == pnetelem->ndev) { if (smc_ib_port_active(pnetelem->smcibdev, pnetelem->ib_port)) { *smcibdev = pnetelem->smcibdev; @@ -544,6 +605,54 @@ void smc_pnet_find_roce_resource(struct sock *sk, } } read_unlock(&smc_pnettable.lock); +} + +/* PNET table analysis for a given sock: + * determine ib_device and port belonging to used internal TCP socket + * ethernet interface. + */ +void smc_pnet_find_roce_resource(struct sock *sk, + struct smc_ib_device **smcibdev, u8 *ibport) +{ + struct dst_entry *dst = sk_dst_get(sk); + + *smcibdev = NULL; + *ibport = 0; + + if (!dst) + goto out; + if (!dst->dev) + goto out_rel; + + /* if possible, lookup via hardware-defined pnetid */ + smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport); + if (*smcibdev) + goto out_rel; + + /* lookup via SMC PNET table */ + smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport); + +out_rel: + dst_release(dst); +out: + return; +} + +void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev) +{ + struct dst_entry *dst = sk_dst_get(sk); + + *smcismdev = NULL; + if (!dst) + goto out; + if (!dst->dev) + goto out_rel; + + /* if possible, lookup via hardware-defined pnetid */ + smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev); + out_rel: dst_release(dst); +out: + return; } diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 5a29519db976..1e94fd4df7bc 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -12,12 +12,28 @@ #ifndef _SMC_PNET_H #define _SMC_PNET_H +#if IS_ENABLED(CONFIG_HAVE_PNETID) +#include <asm/pnet.h> +#endif + struct smc_ib_device; +struct smcd_dev; + +static inline int smc_pnetid_by_dev_port(struct device *dev, + unsigned short port, u8 *pnetid) +{ +#if IS_ENABLED(CONFIG_HAVE_PNETID) + return pnet_id_by_dev_port(dev, port, pnetid); +#else + return -ENOENT; +#endif +} int smc_pnet_init(void) __init; void smc_pnet_exit(void); int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev); void smc_pnet_find_roce_resource(struct sock *sk, struct smc_ib_device **smcibdev, u8 *ibport); +void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev); #endif diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 3d77b383cccd..b329803c8339 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -305,7 +305,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); /* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */ - rcvbuf_base = conn->rmb_desc->cpu_addr; + rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr; do { /* while (read_remaining) */ if (read_done >= target || (pipe && read_done)) diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index cee666400752..142bcb134dd6 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -24,6 +24,7 @@ #include "smc.h" #include "smc_wr.h" #include "smc_cdc.h" +#include "smc_ism.h" #include "smc_tx.h" #define SMC_TX_WORK_DELAY HZ @@ -250,6 +251,24 @@ out_err: /***************************** sndbuf consumer *******************************/ +/* sndbuf consumer: actual data transfer of one target chunk with ISM write */ +int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, + u32 offset, int signal) +{ + struct smc_ism_position pos; + int rc; + + memset(&pos, 0, sizeof(pos)); + pos.token = conn->peer_token; + pos.index = conn->peer_rmbe_idx; + pos.offset = conn->tx_off + offset; + pos.signal = signal; + rc = smc_ism_write(conn->lgr->smcd, &pos, data, len); + if (rc) + conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; + return rc; +} + /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, int num_sges, struct ib_sge sges[]) @@ -297,21 +316,104 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn, smc_curs_add(conn->sndbuf_desc->len, sent, len); } +/* SMC-R helper for smc_tx_rdma_writes() */ +static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, + size_t src_off, size_t src_len, + size_t dst_off, size_t dst_len) +{ + dma_addr_t dma_addr = + sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); + struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + int src_len_sum = src_len, dst_len_sum = dst_len; + struct ib_sge sges[SMC_IB_MAX_SEND_SGE]; + int sent_count = src_off; + int srcchunk, dstchunk; + int num_sges; + int rc; + + for (dstchunk = 0; dstchunk < 2; dstchunk++) { + num_sges = 0; + for (srcchunk = 0; srcchunk < 2; srcchunk++) { + sges[srcchunk].addr = dma_addr + src_off; + sges[srcchunk].length = src_len; + sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; + num_sges++; + + src_off += src_len; + if (src_off >= conn->sndbuf_desc->len) + src_off -= conn->sndbuf_desc->len; + /* modulo in send ring */ + if (src_len_sum == dst_len) + break; /* either on 1st or 2nd iteration */ + /* prepare next (== 2nd) iteration */ + src_len = dst_len - src_len; /* remainder */ + src_len_sum += src_len; + } + rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges); + if (rc) + return rc; + if (dst_len_sum == len) + break; /* either on 1st or 2nd iteration */ + /* prepare next (== 2nd) iteration */ + dst_off = 0; /* modulo offset in RMBE ring buffer */ + dst_len = len - dst_len; /* remainder */ + dst_len_sum += dst_len; + src_len = min_t(int, dst_len, conn->sndbuf_desc->len - + sent_count); + src_len_sum = src_len; + } + return 0; +} + +/* SMC-D helper for smc_tx_rdma_writes() */ +static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len, + size_t src_off, size_t src_len, + size_t dst_off, size_t dst_len) +{ + int src_len_sum = src_len, dst_len_sum = dst_len; + int srcchunk, dstchunk; + int rc; + + for (dstchunk = 0; dstchunk < 2; dstchunk++) { + for (srcchunk = 0; srcchunk < 2; srcchunk++) { + void *data = conn->sndbuf_desc->cpu_addr + src_off; + + rc = smcd_tx_ism_write(conn, data, src_len, dst_off + + sizeof(struct smcd_cdc_msg), 0); + if (rc) + return rc; + dst_off += src_len; + src_off += src_len; + if (src_off >= conn->sndbuf_desc->len) + src_off -= conn->sndbuf_desc->len; + /* modulo in send ring */ + if (src_len_sum == dst_len) + break; /* either on 1st or 2nd iteration */ + /* prepare next (== 2nd) iteration */ + src_len = dst_len - src_len; /* remainder */ + src_len_sum += src_len; + } + if (dst_len_sum == len) + break; /* either on 1st or 2nd iteration */ + /* prepare next (== 2nd) iteration */ + dst_off = 0; /* modulo offset in RMBE ring buffer */ + dst_len = len - dst_len; /* remainder */ + dst_len_sum += dst_len; + src_len = min_t(int, dst_len, conn->sndbuf_desc->len - src_off); + src_len_sum = src_len; + } + return 0; +} + /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit; * usable snd_wnd as max transmit */ static int smc_tx_rdma_writes(struct smc_connection *conn) { - size_t src_off, src_len, dst_off, dst_len; /* current chunk values */ - size_t len, dst_len_sum, src_len_sum, dstchunk, srcchunk; + size_t len, src_len, dst_off, dst_len; /* current chunk values */ union smc_host_cursor sent, prep, prod, cons; - struct ib_sge sges[SMC_IB_MAX_SEND_SGE]; - struct smc_link_group *lgr = conn->lgr; struct smc_cdc_producer_flags *pflags; int to_send, rmbespace; - struct smc_link *link; - dma_addr_t dma_addr; - int num_sges; int rc; /* source: sndbuf */ @@ -341,7 +443,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) len = min(to_send, rmbespace); /* initialize variables for first iteration of subsequent nested loop */ - link = &lgr->lnk[SMC_SINGLE_LINK]; dst_off = prod.count; if (prod.wrap == cons.wrap) { /* the filled destination area is unwrapped, @@ -358,8 +459,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) */ dst_len = len; } - dst_len_sum = dst_len; - src_off = sent.count; /* dst_len determines the maximum src_len */ if (sent.count + dst_len <= conn->sndbuf_desc->len) { /* unwrapped src case: single chunk of entire dst_len */ @@ -368,38 +467,15 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) /* wrapped src case: 2 chunks of sum dst_len; start with 1st: */ src_len = conn->sndbuf_desc->len - sent.count; } - src_len_sum = src_len; - dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); - for (dstchunk = 0; dstchunk < 2; dstchunk++) { - num_sges = 0; - for (srcchunk = 0; srcchunk < 2; srcchunk++) { - sges[srcchunk].addr = dma_addr + src_off; - sges[srcchunk].length = src_len; - sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; - num_sges++; - src_off += src_len; - if (src_off >= conn->sndbuf_desc->len) - src_off -= conn->sndbuf_desc->len; - /* modulo in send ring */ - if (src_len_sum == dst_len) - break; /* either on 1st or 2nd iteration */ - /* prepare next (== 2nd) iteration */ - src_len = dst_len - src_len; /* remainder */ - src_len_sum += src_len; - } - rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges); - if (rc) - return rc; - if (dst_len_sum == len) - break; /* either on 1st or 2nd iteration */ - /* prepare next (== 2nd) iteration */ - dst_off = 0; /* modulo offset in RMBE ring buffer */ - dst_len = len - dst_len; /* remainder */ - dst_len_sum += dst_len; - src_len = min_t(int, - dst_len, conn->sndbuf_desc->len - sent.count); - src_len_sum = src_len; - } + + if (conn->lgr->is_smcd) + rc = smcd_tx_rdma_writes(conn, len, sent.count, src_len, + dst_off, dst_len); + else + rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len, + dst_off, dst_len); + if (rc) + return rc; if (conn->urg_tx_pend && len == to_send) pflags->urg_data_present = 1; @@ -420,7 +496,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) /* Wakeup sndbuf consumers from any context (IRQ or process) * since there is more data to transmit; usable snd_wnd as max transmit */ -int smc_tx_sndbuf_nonempty(struct smc_connection *conn) +static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_cdc_producer_flags *pflags; struct smc_cdc_tx_pend *pend; @@ -467,6 +543,37 @@ out_unlock: return rc; } +static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) +{ + struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; + int rc = 0; + + spin_lock_bh(&conn->send_lock); + if (!pflags->urg_data_present) + rc = smc_tx_rdma_writes(conn); + if (!rc) + rc = smcd_cdc_msg_send(conn); + + if (!rc && pflags->urg_data_present) { + pflags->urg_data_pending = 0; + pflags->urg_data_present = 0; + } + spin_unlock_bh(&conn->send_lock); + return rc; +} + +int smc_tx_sndbuf_nonempty(struct smc_connection *conn) +{ + int rc; + + if (conn->lgr->is_smcd) + rc = smcd_tx_sndbuf_nonempty(conn); + else + rc = smcr_tx_sndbuf_nonempty(conn); + + return rc; +} + /* Wakeup sndbuf consumers from process context * since there is more data to transmit */ @@ -495,7 +602,8 @@ out: void smc_tx_consumer_update(struct smc_connection *conn, bool force) { - union smc_host_cursor cfed, cons; + union smc_host_cursor cfed, cons, prod; + int sender_free = conn->rmb_desc->len; int to_confirm; smc_curs_write(&cons, @@ -505,11 +613,18 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force) smc_curs_read(&conn->rx_curs_confirmed, conn), conn); to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons); + if (to_confirm > conn->rmbe_update_limit) { + smc_curs_write(&prod, + smc_curs_read(&conn->local_rx_ctrl.prod, conn), + conn); + sender_free = conn->rmb_desc->len - + smc_curs_diff(conn->rmb_desc->len, &prod, &cfed); + } if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || force || ((to_confirm > conn->rmbe_update_limit) && - ((to_confirm > (conn->rmb_desc->len / 2)) || + ((sender_free <= (conn->rmb_desc->len / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { if ((smc_cdc_get_slot_and_msg_send(conn) < 0) && conn->alert_token_local) { /* connection healthy */ diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h index 9d2238909fa0..b22bdc5694c4 100644 --- a/net/smc/smc_tx.h +++ b/net/smc/smc_tx.h @@ -33,5 +33,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len); int smc_tx_sndbuf_nonempty(struct smc_connection *conn); void smc_tx_sndbuf_nonfull(struct smc_sock *smc); void smc_tx_consumer_update(struct smc_connection *conn, bool force); +int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, + u32 offset, int signal); #endif /* SMC_TX_H */ diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 625acb27efcc..3a512936eea9 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -140,11 +140,13 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, /* We are going to append to the frags_list of head. * Need to unshare the frag_list. */ - err = skb_unclone(head, GFP_ATOMIC); - if (err) { - STRP_STATS_INCR(strp->stats.mem_fail); - desc->error = err; - return 0; + if (skb_has_frag_list(head)) { + err = skb_unclone(head, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.mem_fail); + desc->error = err; + return 0; + } } if (unlikely(skb_shinfo(head)->frag_list)) { @@ -201,14 +203,16 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, memset(stm, 0, sizeof(*stm)); stm->strp.offset = orig_offset + eaten; } else { - /* Unclone since we may be appending to an skb that we + /* Unclone if we are appending to an skb that we * already share a frag_list with. */ - err = skb_unclone(skb, GFP_ATOMIC); - if (err) { - STRP_STATS_INCR(strp->stats.mem_fail); - desc->error = err; - break; + if (skb_has_frag_list(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.mem_fail); + desc->error = err; + break; + } } stm = _strp_msg(head); diff --git a/net/tipc/group.c b/net/tipc/group.c index d7a7befeddd4..cbe39e8db39c 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -918,3 +918,35 @@ void tipc_group_member_evt(struct tipc_group *grp, } *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); } + +int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb) +{ + struct nlattr *group = nla_nest_start(skb, TIPC_NLA_SOCK_GROUP); + + if (nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_ID, + grp->type) || + nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_INSTANCE, + grp->instance) || + nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_BC_SEND_NEXT, + grp->bc_snd_nxt)) + goto group_msg_cancel; + + if (grp->scope == TIPC_NODE_SCOPE) + if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_NODE_SCOPE)) + goto group_msg_cancel; + + if (grp->scope == TIPC_CLUSTER_SCOPE) + if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_CLUSTER_SCOPE)) + goto group_msg_cancel; + + if (*grp->open) + if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_OPEN)) + goto group_msg_cancel; + + nla_nest_end(skb, group); + return 0; + +group_msg_cancel: + nla_nest_cancel(skb, group); + return -1; +} diff --git a/net/tipc/group.h b/net/tipc/group.h index 5996af6e9f1d..76b4e5a7b39d 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -72,4 +72,5 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, u32 port, struct sk_buff_head *xmitq); u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); void tipc_group_update_member(struct tipc_member *m, int len); +int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb); #endif diff --git a/net/tipc/msg.c b/net/tipc/msg.c index b6c45dccba3d..b61891054709 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -416,26 +416,31 @@ bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu) */ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) { - struct tipc_msg *msg; - int imsz, offset; + struct tipc_msg *hdr, *ihdr; + int imsz; *iskb = NULL; if (unlikely(skb_linearize(skb))) goto none; - msg = buf_msg(skb); - offset = msg_hdr_sz(msg) + *pos; - if (unlikely(offset > (msg_size(msg) - MIN_H_SIZE))) + hdr = buf_msg(skb); + if (unlikely(*pos > (msg_data_sz(hdr) - MIN_H_SIZE))) goto none; - *iskb = skb_clone(skb, GFP_ATOMIC); - if (unlikely(!*iskb)) + ihdr = (struct tipc_msg *)(msg_data(hdr) + *pos); + imsz = msg_size(ihdr); + + if ((*pos + imsz) > msg_data_sz(hdr)) goto none; - skb_pull(*iskb, offset); - imsz = msg_size(buf_msg(*iskb)); - skb_trim(*iskb, imsz); + + *iskb = tipc_buf_acquire(imsz, GFP_ATOMIC); + if (!*iskb) + goto none; + + skb_copy_to_linear_data(*iskb, ihdr, imsz); if (unlikely(!tipc_msg_validate(iskb))) goto none; + *pos += align(imsz); return true; none: @@ -531,12 +536,6 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) msg_set_hdr_sz(hdr, BASIC_H_SIZE); } - if (skb_cloned(_skb) && - pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC)) - goto exit; - - /* reassign after skb header modifications */ - hdr = buf_msg(_skb); /* Now reverse the concerned fields */ msg_set_errcode(hdr, err); msg_set_non_seq(hdr, 0); @@ -595,10 +594,6 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) if (!skb_cloned(skb)) return true; - /* Unclone buffer in case it was bundled */ - if (pskb_expand_head(skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC)) - return false; - return true; } diff --git a/net/tipc/node.c b/net/tipc/node.c index 6a44eb812baf..cfdbaf479fd1 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -45,6 +45,7 @@ #include "netlink.h" #define INVALID_NODE_SIG 0x10000 +#define NODE_CLEANUP_AFTER 300000 /* Flags used to take different actions according to flag type * TIPC_NOTIFY_NODE_DOWN: notify node is down @@ -96,6 +97,7 @@ struct tipc_bclink_entry { * @link_id: local and remote bearer ids of changing link, if any * @publ_list: list of publications * @rcu: rcu struct for tipc_node + * @delete_at: indicates the time for deleting a down node */ struct tipc_node { u32 addr; @@ -121,6 +123,7 @@ struct tipc_node { unsigned long keepalive_intv; struct timer_list timer; struct rcu_head rcu; + unsigned long delete_at; }; /* Node FSM states and events: @@ -160,6 +163,7 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr); static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id); static void tipc_node_put(struct tipc_node *node); static bool node_is_up(struct tipc_node *n); +static void tipc_node_delete_from_list(struct tipc_node *node); struct tipc_sock_conn { u32 port; @@ -390,6 +394,7 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, for (i = 0; i < MAX_BEARERS; i++) spin_lock_init(&n->links[i].lock); n->state = SELF_DOWN_PEER_LEAVING; + n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER); n->signature = INVALID_NODE_SIG; n->active_links[0] = INVALID_BEARER_ID; n->active_links[1] = INVALID_BEARER_ID; @@ -433,11 +438,16 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) tipc_link_set_abort_limit(l, tol / n->keepalive_intv); } -static void tipc_node_delete(struct tipc_node *node) +static void tipc_node_delete_from_list(struct tipc_node *node) { list_del_rcu(&node->list); hlist_del_rcu(&node->hash); tipc_node_put(node); +} + +static void tipc_node_delete(struct tipc_node *node) +{ + tipc_node_delete_from_list(node); del_timer_sync(&node->timer); tipc_node_put(node); @@ -544,6 +554,42 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) tipc_node_put(node); } +static void tipc_node_clear_links(struct tipc_node *node) +{ + int i; + + for (i = 0; i < MAX_BEARERS; i++) { + struct tipc_link_entry *le = &node->links[i]; + + if (le->link) { + kfree(le->link); + le->link = NULL; + node->link_cnt--; + } + } +} + +/* tipc_node_cleanup - delete nodes that does not + * have active links for NODE_CLEANUP_AFTER time + */ +static int tipc_node_cleanup(struct tipc_node *peer) +{ + struct tipc_net *tn = tipc_net(peer->net); + bool deleted = false; + + spin_lock_bh(&tn->node_list_lock); + tipc_node_write_lock(peer); + + if (!node_is_up(peer) && time_after(jiffies, peer->delete_at)) { + tipc_node_clear_links(peer); + tipc_node_delete_from_list(peer); + deleted = true; + } + tipc_node_write_unlock(peer); + spin_unlock_bh(&tn->node_list_lock); + return deleted; +} + /* tipc_node_timeout - handle expiration of node timer */ static void tipc_node_timeout(struct timer_list *t) @@ -551,21 +597,29 @@ static void tipc_node_timeout(struct timer_list *t) struct tipc_node *n = from_timer(n, t, timer); struct tipc_link_entry *le; struct sk_buff_head xmitq; + int remains = n->link_cnt; int bearer_id; int rc = 0; + if (!node_is_up(n) && tipc_node_cleanup(n)) { + /*Removing the reference of Timer*/ + tipc_node_put(n); + return; + } + __skb_queue_head_init(&xmitq); - for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + for (bearer_id = 0; remains && (bearer_id < MAX_BEARERS); bearer_id++) { tipc_node_read_lock(n); le = &n->links[bearer_id]; - spin_lock_bh(&le->lock); if (le->link) { + spin_lock_bh(&le->lock); /* Link tolerance may change asynchronously: */ tipc_node_calculate_timer(n, le->link); rc = tipc_link_timeout(le->link, &xmitq); + spin_unlock_bh(&le->lock); + remains--; } - spin_unlock_bh(&le->lock); tipc_node_read_unlock(n); tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr); if (rc & TIPC_LINK_DOWN_EVT) @@ -1171,6 +1225,7 @@ static void node_lost_contact(struct tipc_node *n, uint i; pr_debug("Lost contact with %x\n", n->addr); + n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER); /* Clean up broadcast state */ tipc_bcast_remove_peer(n->net, n->bc_entry.link); @@ -1740,7 +1795,6 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) struct tipc_node *peer; u32 addr; int err; - int i; /* We identify the peer by its net */ if (!info->attrs[TIPC_NLA_NET]) @@ -1775,15 +1829,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) goto err_out; } - for (i = 0; i < MAX_BEARERS; i++) { - struct tipc_link_entry *le = &peer->links[i]; - - if (le->link) { - kfree(le->link); - le->link = NULL; - peer->link_cnt--; - } - } + tipc_node_clear_links(peer); tipc_node_write_unlock(peer); tipc_node_delete(peer); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 930852c54d7a..3d21414ba357 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3320,6 +3320,11 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, goto stat_msg_cancel; nla_nest_end(skb, stat); + + if (tsk->group) + if (tipc_group_fill_sock_diag(tsk->group, skb)) + goto stat_msg_cancel; + nla_nest_end(skb, attrs); return 0; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d2380548f8f6..0d670c8adf18 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -942,7 +942,7 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) { struct tls_context *tls_ctx = tls_get_ctx(strp->sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - char header[tls_ctx->rx.prepend_size]; + char header[TLS_HEADER_SIZE + MAX_IV_SIZE]; struct strp_msg *rxm = strp_msg(skb); size_t cipher_overhead; size_t data_len = 0; @@ -952,6 +952,12 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) if (rxm->offset + tls_ctx->rx.prepend_size > skb->len) return 0; + /* Sanity-check size of on-stack buffer. */ + if (WARN_ON(tls_ctx->rx.prepend_size > sizeof(header))) { + ret = -EINVAL; + goto read_failure; + } + /* Linearize header to local buffer */ ret = skb_copy_bits(skb, rxm->offset, header, tls_ctx->rx.prepend_size); @@ -991,9 +997,6 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb) { struct tls_context *tls_ctx = tls_get_ctx(strp->sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - struct strp_msg *rxm; - - rxm = strp_msg(skb); ctx->decrypted = false; @@ -1112,7 +1115,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) } /* Sanity-check the IV size for stack allocations. */ - if (iv_size > MAX_IV_SIZE) { + if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE) { rc = -EINVAL; goto free_priv; } diff --git a/net/wireless/core.c b/net/wireless/core.c index 48e8097339ab..a88551f3bc43 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -3,7 +3,7 @@ * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2015 Intel Deutschland GmbH + * Copyright 2015-2017 Intel Deutschland GmbH */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -744,6 +744,8 @@ int wiphy_register(struct wiphy *wiphy) /* sanity check supported bands/channels */ for (band = 0; band < NUM_NL80211_BANDS; band++) { + u16 types = 0; + sband = wiphy->bands[band]; if (!sband) continue; @@ -788,6 +790,23 @@ int wiphy_register(struct wiphy *wiphy) sband->channels[i].band = band; } + for (i = 0; i < sband->n_iftype_data; i++) { + const struct ieee80211_sband_iftype_data *iftd; + + iftd = &sband->iftype_data[i]; + + if (WARN_ON(!iftd->types_mask)) + return -EINVAL; + if (WARN_ON(types & iftd->types_mask)) + return -EINVAL; + + /* at least one piece of information must be present */ + if (WARN_ON(!iftd->he_cap.has_he)) + return -EINVAL; + + types |= iftd->types_mask; + } + have_band = true; } diff --git a/net/wireless/core.h b/net/wireless/core.h index 63eb1b5fdd04..7f52ef569320 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -76,7 +76,7 @@ struct cfg80211_registered_device { struct cfg80211_scan_request *scan_req; /* protected by RTNL */ struct sk_buff *scan_msg; struct list_head sched_scan_req_list; - unsigned long suspend_at; + time64_t suspend_at; struct work_struct scan_done_wk; struct genl_info *cur_cmd_info; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4eece06be1e7..e4e5f025d16b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -428,6 +428,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 }, [NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 }, [NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 }, + [NL80211_ATTR_HE_CAPABILITY] = { .type = NLA_BINARY, + .len = NL80211_HE_MAX_CAPABILITY_LEN }, }; /* policy for the key attributes */ @@ -1324,6 +1326,34 @@ static int nl80211_send_coalesce(struct sk_buff *msg, return 0; } +static int +nl80211_send_iftype_data(struct sk_buff *msg, + const struct ieee80211_sband_iftype_data *iftdata) +{ + const struct ieee80211_sta_he_cap *he_cap = &iftdata->he_cap; + + if (nl80211_put_iftypes(msg, NL80211_BAND_IFTYPE_ATTR_IFTYPES, + iftdata->types_mask)) + return -ENOBUFS; + + if (he_cap->has_he) { + if (nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC, + sizeof(he_cap->he_cap_elem.mac_cap_info), + he_cap->he_cap_elem.mac_cap_info) || + nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY, + sizeof(he_cap->he_cap_elem.phy_cap_info), + he_cap->he_cap_elem.phy_cap_info) || + nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET, + sizeof(he_cap->he_mcs_nss_supp), + &he_cap->he_mcs_nss_supp) || + nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE, + sizeof(he_cap->ppe_thres), he_cap->ppe_thres)) + return -ENOBUFS; + } + + return 0; +} + static int nl80211_send_band_rateinfo(struct sk_buff *msg, struct ieee80211_supported_band *sband) { @@ -1353,6 +1383,32 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg, sband->vht_cap.cap))) return -ENOBUFS; + if (sband->n_iftype_data) { + struct nlattr *nl_iftype_data = + nla_nest_start(msg, NL80211_BAND_ATTR_IFTYPE_DATA); + int err; + + if (!nl_iftype_data) + return -ENOBUFS; + + for (i = 0; i < sband->n_iftype_data; i++) { + struct nlattr *iftdata; + + iftdata = nla_nest_start(msg, i + 1); + if (!iftdata) + return -ENOBUFS; + + err = nl80211_send_iftype_data(msg, + &sband->iftype_data[i]); + if (err) + return err; + + nla_nest_end(msg, iftdata); + } + + nla_nest_end(msg, nl_iftype_data); + } + /* add bitrates */ nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES); if (!nl_rates) @@ -2757,7 +2813,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, wdev_address(wdev)) || nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->devlist_generation ^ - (cfg80211_rdev_list_generation << 2))) + (cfg80211_rdev_list_generation << 2)) || + nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr)) goto nla_put_failure; if (rdev->ops->get_channel) { @@ -4471,6 +4528,9 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, case RATE_INFO_BW_160: rate_flg = NL80211_RATE_INFO_160_MHZ_WIDTH; break; + case RATE_INFO_BW_HE_RU: + rate_flg = 0; + WARN_ON(!(info->flags & RATE_INFO_FLAGS_HE_MCS)); } if (rate_flg && nla_put_flag(msg, rate_flg)) @@ -4490,6 +4550,19 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, if (info->flags & RATE_INFO_FLAGS_SHORT_GI && nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) return false; + } else if (info->flags & RATE_INFO_FLAGS_HE_MCS) { + if (nla_put_u8(msg, NL80211_RATE_INFO_HE_MCS, info->mcs)) + return false; + if (nla_put_u8(msg, NL80211_RATE_INFO_HE_NSS, info->nss)) + return false; + if (nla_put_u8(msg, NL80211_RATE_INFO_HE_GI, info->he_gi)) + return false; + if (nla_put_u8(msg, NL80211_RATE_INFO_HE_DCM, info->he_dcm)) + return false; + if (info->bw == RATE_INFO_BW_HE_RU && + nla_put_u8(msg, NL80211_RATE_INFO_HE_RU_ALLOC, + info->he_ru_alloc)) + return false; } nla_nest_end(msg, rate); @@ -4546,13 +4619,13 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, #define PUT_SINFO(attr, memb, type) do { \ BUILD_BUG_ON(sizeof(type) == sizeof(u64)); \ - if (sinfo->filled & (1ULL << NL80211_STA_INFO_ ## attr) && \ + if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_ ## attr) && \ nla_put_ ## type(msg, NL80211_STA_INFO_ ## attr, \ sinfo->memb)) \ goto nla_put_failure; \ } while (0) #define PUT_SINFO_U64(attr, memb) do { \ - if (sinfo->filled & (1ULL << NL80211_STA_INFO_ ## attr) && \ + if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_ ## attr) && \ nla_put_u64_64bit(msg, NL80211_STA_INFO_ ## attr, \ sinfo->memb, NL80211_STA_INFO_PAD)) \ goto nla_put_failure; \ @@ -4561,14 +4634,14 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, PUT_SINFO(CONNECTED_TIME, connected_time, u32); PUT_SINFO(INACTIVE_TIME, inactive_time, u32); - if (sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES) | - BIT(NL80211_STA_INFO_RX_BYTES64)) && + if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES) | + BIT_ULL(NL80211_STA_INFO_RX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES, (u32)sinfo->rx_bytes)) goto nla_put_failure; - if (sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES) | - BIT(NL80211_STA_INFO_TX_BYTES64)) && + if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES) | + BIT_ULL(NL80211_STA_INFO_TX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES, (u32)sinfo->tx_bytes)) goto nla_put_failure; @@ -4588,24 +4661,24 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, default: break; } - if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL)) { + if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) { if (!nl80211_put_signal(msg, sinfo->chains, sinfo->chain_signal, NL80211_STA_INFO_CHAIN_SIGNAL)) goto nla_put_failure; } - if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) { + if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) { if (!nl80211_put_signal(msg, sinfo->chains, sinfo->chain_signal_avg, NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) goto nla_put_failure; } - if (sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE)) { + if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) { if (!nl80211_put_sta_rate(msg, &sinfo->txrate, NL80211_STA_INFO_TX_BITRATE)) goto nla_put_failure; } - if (sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE)) { + if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) { if (!nl80211_put_sta_rate(msg, &sinfo->rxrate, NL80211_STA_INFO_RX_BITRATE)) goto nla_put_failure; @@ -4621,7 +4694,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, PUT_SINFO(PEER_PM, peer_pm, u32); PUT_SINFO(NONPEER_PM, nonpeer_pm, u32); - if (sinfo->filled & BIT(NL80211_STA_INFO_BSS_PARAM)) { + if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM)) { bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM); if (!bss_param) goto nla_put_failure; @@ -4640,7 +4713,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, nla_nest_end(msg, bss_param); } - if ((sinfo->filled & BIT(NL80211_STA_INFO_STA_FLAGS)) && + if ((sinfo->filled & BIT_ULL(NL80211_STA_INFO_STA_FLAGS)) && nla_put(msg, NL80211_STA_INFO_STA_FLAGS, sizeof(struct nl80211_sta_flag_update), &sinfo->sta_flags)) @@ -4886,7 +4959,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy, return -EINVAL; if (params->supported_rates) return -EINVAL; - if (params->ext_capab || params->ht_capa || params->vht_capa) + if (params->ext_capab || params->ht_capa || params->vht_capa || + params->he_capa) return -EINVAL; } @@ -5092,6 +5166,15 @@ static int nl80211_set_station_tdls(struct genl_info *info, if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) params->vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) { + params->he_capa = + nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]); + params->he_capa_len = + nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]); + + if (params->he_capa_len < NL80211_HE_MIN_CAPABILITY_LEN) + return -EINVAL; + } err = nl80211_parse_sta_channel_info(info, params); if (err) @@ -5319,6 +5402,17 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) { + params.he_capa = + nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]); + params.he_capa_len = + nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]); + + /* max len is validated in nla policy */ + if (params.he_capa_len < NL80211_HE_MIN_CAPABILITY_LEN) + return -EINVAL; + } + if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) { params.opmode_notif_used = true; params.opmode_notif = @@ -5351,6 +5445,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) { params.ht_capa = NULL; params.vht_capa = NULL; + + /* HE requires WME */ + if (params.he_capa_len) + return -EINVAL; } /* When you run into this, adjust the code below for the new flag */ @@ -6848,6 +6946,16 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev) return regulatory_pre_cac_allowed(wdev->wiphy); } +static bool nl80211_check_scan_feat(struct wiphy *wiphy, u32 flags, u32 flag, + enum nl80211_ext_feature_index feat) +{ + if (!(flags & flag)) + return true; + if (wiphy_ext_feature_isset(wiphy, feat)) + return true; + return false; +} + static int nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, void *request, struct nlattr **attrs, @@ -6882,15 +6990,33 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) || - ((*flags & NL80211_SCAN_FLAG_LOW_SPAN) && - !wiphy_ext_feature_isset(wiphy, - NL80211_EXT_FEATURE_LOW_SPAN_SCAN)) || - ((*flags & NL80211_SCAN_FLAG_LOW_POWER) && - !wiphy_ext_feature_isset(wiphy, - NL80211_EXT_FEATURE_LOW_POWER_SCAN)) || - ((*flags & NL80211_SCAN_FLAG_HIGH_ACCURACY) && - !wiphy_ext_feature_isset(wiphy, - NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN))) + !nl80211_check_scan_feat(wiphy, *flags, + NL80211_SCAN_FLAG_LOW_SPAN, + NL80211_EXT_FEATURE_LOW_SPAN_SCAN) || + !nl80211_check_scan_feat(wiphy, *flags, + NL80211_SCAN_FLAG_LOW_POWER, + NL80211_EXT_FEATURE_LOW_POWER_SCAN) || + !nl80211_check_scan_feat(wiphy, *flags, + NL80211_SCAN_FLAG_HIGH_ACCURACY, + NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN) || + !nl80211_check_scan_feat(wiphy, *flags, + NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME, + NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME) || + !nl80211_check_scan_feat(wiphy, *flags, + NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP, + NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP) || + !nl80211_check_scan_feat(wiphy, *flags, + NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) || + !nl80211_check_scan_feat(wiphy, *flags, + NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE) || + !nl80211_check_scan_feat(wiphy, *flags, + NL80211_SCAN_FLAG_RANDOM_SN, + NL80211_EXT_FEATURE_SCAN_RANDOM_SN) || + !nl80211_check_scan_feat(wiphy, *flags, + NL80211_SCAN_FLAG_MIN_PREQ_CONTENT, + NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT)) return -EOPNOTSUPP; if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { @@ -6905,26 +7031,6 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, return err; } - if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) && - !wiphy_ext_feature_isset(wiphy, - NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME)) - return -EOPNOTSUPP; - - if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) && - !wiphy_ext_feature_isset(wiphy, - NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP)) - return -EOPNOTSUPP; - - if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) && - !wiphy_ext_feature_isset(wiphy, - NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION)) - return -EOPNOTSUPP; - - if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) && - !wiphy_ext_feature_isset(wiphy, - NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE)) - return -EOPNOTSUPP; - return 0; } @@ -10147,7 +10253,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, if (err) return err; - if (sinfo.filled & BIT(NL80211_STA_INFO_BEACON_SIGNAL_AVG)) + if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG)) wdev->cqm_config->last_rssi_event_value = (s8) sinfo.rx_beacon_signal_avg; } diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 570a2b67ca10..6ab32f6a1961 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -102,7 +102,7 @@ static int wiphy_suspend(struct device *dev) struct cfg80211_registered_device *rdev = dev_to_rdev(dev); int ret = 0; - rdev->suspend_at = get_seconds(); + rdev->suspend_at = ktime_get_boottime_seconds(); rtnl_lock(); if (rdev->wiphy.registered) { @@ -130,7 +130,7 @@ static int wiphy_resume(struct device *dev) int ret = 0; /* Age scan results with time spent in suspend */ - cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at); + cfg80211_bss_age(rdev, ktime_get_boottime_seconds() - rdev->suspend_at); rtnl_lock(); if (rdev->wiphy.registered && rdev->ops->resume) diff --git a/net/wireless/util.c b/net/wireless/util.c index 3c654cd7ba56..e0825a019e9f 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -4,6 +4,7 @@ * * Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright 2017 Intel Deutschland GmbH */ #include <linux/export.h> #include <linux/bitops.h> @@ -1142,6 +1143,85 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) return 0; } +static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate) +{ +#define SCALE 2048 + u16 mcs_divisors[12] = { + 34133, /* 16.666666... */ + 17067, /* 8.333333... */ + 11378, /* 5.555555... */ + 8533, /* 4.166666... */ + 5689, /* 2.777777... */ + 4267, /* 2.083333... */ + 3923, /* 1.851851... */ + 3413, /* 1.666666... */ + 2844, /* 1.388888... */ + 2560, /* 1.250000... */ + 2276, /* 1.111111... */ + 2048, /* 1.000000... */ + }; + u32 rates_160M[3] = { 960777777, 907400000, 816666666 }; + u32 rates_969[3] = { 480388888, 453700000, 408333333 }; + u32 rates_484[3] = { 229411111, 216666666, 195000000 }; + u32 rates_242[3] = { 114711111, 108333333, 97500000 }; + u32 rates_106[3] = { 40000000, 37777777, 34000000 }; + u32 rates_52[3] = { 18820000, 17777777, 16000000 }; + u32 rates_26[3] = { 9411111, 8888888, 8000000 }; + u64 tmp; + u32 result; + + if (WARN_ON_ONCE(rate->mcs > 11)) + return 0; + + if (WARN_ON_ONCE(rate->he_gi > NL80211_RATE_INFO_HE_GI_3_2)) + return 0; + if (WARN_ON_ONCE(rate->he_ru_alloc > + NL80211_RATE_INFO_HE_RU_ALLOC_2x996)) + return 0; + if (WARN_ON_ONCE(rate->nss < 1 || rate->nss > 8)) + return 0; + + if (rate->bw == RATE_INFO_BW_160) + result = rates_160M[rate->he_gi]; + else if (rate->bw == RATE_INFO_BW_80 || + (rate->bw == RATE_INFO_BW_HE_RU && + rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_996)) + result = rates_969[rate->he_gi]; + else if (rate->bw == RATE_INFO_BW_40 || + (rate->bw == RATE_INFO_BW_HE_RU && + rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_484)) + result = rates_484[rate->he_gi]; + else if (rate->bw == RATE_INFO_BW_20 || + (rate->bw == RATE_INFO_BW_HE_RU && + rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_242)) + result = rates_242[rate->he_gi]; + else if (rate->bw == RATE_INFO_BW_HE_RU && + rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_106) + result = rates_106[rate->he_gi]; + else if (rate->bw == RATE_INFO_BW_HE_RU && + rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_52) + result = rates_52[rate->he_gi]; + else if (rate->bw == RATE_INFO_BW_HE_RU && + rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_26) + result = rates_26[rate->he_gi]; + else if (WARN(1, "invalid HE MCS: bw:%d, ru:%d\n", + rate->bw, rate->he_ru_alloc)) + return 0; + + /* now scale to the appropriate MCS */ + tmp = result; + tmp *= SCALE; + do_div(tmp, mcs_divisors[rate->mcs]); + result = tmp; + + /* and take NSS, DCM into account */ + result = (result * rate->nss) / 8; + if (rate->he_dcm) + result /= 2; + + return result; +} + u32 cfg80211_calculate_bitrate(struct rate_info *rate) { if (rate->flags & RATE_INFO_FLAGS_MCS) @@ -1150,6 +1230,8 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate) return cfg80211_calculate_bitrate_60g(rate); if (rate->flags & RATE_INFO_FLAGS_VHT_MCS) return cfg80211_calculate_bitrate_vht(rate); + if (rate->flags & RATE_INFO_FLAGS_HE_MCS) + return cfg80211_calculate_bitrate_he(rate); return rate->legacy; } @@ -1791,8 +1873,9 @@ bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, int cfg80211_sinfo_alloc_tid_stats(struct station_info *sinfo, gfp_t gfp) { - sinfo->pertid = kcalloc(sizeof(*(sinfo->pertid)), - IEEE80211_NUM_TIDS + 1, gfp); + sinfo->pertid = kcalloc(IEEE80211_NUM_TIDS + 1, + sizeof(*(sinfo->pertid)), + gfp); if (!sinfo->pertid) return -ENOMEM; diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 05186a47878f..167f7025ac98 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1278,7 +1278,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev, if (err) return err; - if (!(sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE))) + if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) return -EOPNOTSUPP; rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate); @@ -1320,7 +1320,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: - if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) { + if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL)) { int sig = sinfo.signal; wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED; wstats.qual.updated |= IW_QUAL_QUAL_UPDATED; @@ -1334,7 +1334,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) break; } case CFG80211_SIGNAL_TYPE_UNSPEC: - if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) { + if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL)) { wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED; wstats.qual.updated |= IW_QUAL_QUAL_UPDATED; wstats.qual.level = sinfo.signal; @@ -1347,9 +1347,9 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) } wstats.qual.updated |= IW_QUAL_NOISE_INVALID; - if (sinfo.filled & BIT(NL80211_STA_INFO_RX_DROP_MISC)) + if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC)) wstats.discard.misc = sinfo.rx_dropped_misc; - if (sinfo.filled & BIT(NL80211_STA_INFO_TX_FAILED)) + if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED)) wstats.discard.retries = sinfo.tx_failed; return &wstats; |