diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/devmap.c | 200 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 2 |
2 files changed, 202 insertions, 0 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index a0501266bdb8..9af048a932b5 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -37,6 +37,12 @@ * notifier hook walks the map we know that new dev references can not be * added by the user because core infrastructure ensures dev_get_by_index() * calls will fail at this point. + * + * The devmap_hash type is a map type which interprets keys as ifindexes and + * indexes these using a hashmap. This allows maps that use ifindex as key to be + * densely packed instead of having holes in the lookup array for unused + * ifindexes. The setup and packet enqueue/send code is shared between the two + * types of devmap; only the lookup and insertion is different. */ #include <linux/bpf.h> #include <net/xdp.h> @@ -59,6 +65,7 @@ struct xdp_bulk_queue { struct bpf_dtab_netdev { struct net_device *dev; /* must be first member, due to tracepoint */ + struct hlist_node index_hlist; struct bpf_dtab *dtab; struct xdp_bulk_queue __percpu *bulkq; struct rcu_head rcu; @@ -70,11 +77,30 @@ struct bpf_dtab { struct bpf_dtab_netdev **netdev_map; struct list_head __percpu *flush_list; struct list_head list; + + /* these are only used for DEVMAP_HASH type maps */ + struct hlist_head *dev_index_head; + spinlock_t index_lock; + unsigned int items; + u32 n_buckets; }; static DEFINE_SPINLOCK(dev_map_lock); static LIST_HEAD(dev_map_list); +static struct hlist_head *dev_map_create_hash(unsigned int entries) +{ + int i; + struct hlist_head *hash; + + hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL); + if (hash != NULL) + for (i = 0; i < entries; i++) + INIT_HLIST_HEAD(&hash[i]); + + return hash; +} + static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) { int err, cpu; @@ -97,6 +123,14 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); cost += sizeof(struct list_head) * num_possible_cpus(); + if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { + dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); + + if (!dtab->n_buckets) /* Overflow check */ + return -EINVAL; + cost += sizeof(struct hlist_head) * dtab->n_buckets; + } + /* if map size is larger than memlock limit, reject it */ err = bpf_map_charge_init(&dtab->map.memory, cost); if (err) @@ -115,8 +149,18 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) if (!dtab->netdev_map) goto free_percpu; + if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { + dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets); + if (!dtab->dev_index_head) + goto free_map_area; + + spin_lock_init(&dtab->index_lock); + } + return 0; +free_map_area: + bpf_map_area_free(dtab->netdev_map); free_percpu: free_percpu(dtab->flush_list); free_charge: @@ -198,6 +242,7 @@ static void dev_map_free(struct bpf_map *map) free_percpu(dtab->flush_list); bpf_map_area_free(dtab->netdev_map); + kfree(dtab->dev_index_head); kfree(dtab); } @@ -218,6 +263,70 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) return 0; } +static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab, + int idx) +{ + return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)]; +} + +struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct hlist_head *head = dev_map_index_hash(dtab, key); + struct bpf_dtab_netdev *dev; + + hlist_for_each_entry_rcu(dev, head, index_hlist) + if (dev->idx == key) + return dev; + + return NULL; +} + +static int dev_map_hash_get_next_key(struct bpf_map *map, void *key, + void *next_key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + u32 idx, *next = next_key; + struct bpf_dtab_netdev *dev, *next_dev; + struct hlist_head *head; + int i = 0; + + if (!key) + goto find_first; + + idx = *(u32 *)key; + + dev = __dev_map_hash_lookup_elem(map, idx); + if (!dev) + goto find_first; + + next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&dev->index_hlist)), + struct bpf_dtab_netdev, index_hlist); + + if (next_dev) { + *next = next_dev->idx; + return 0; + } + + i = idx & (dtab->n_buckets - 1); + i++; + + find_first: + for (; i < dtab->n_buckets; i++) { + head = dev_map_index_hash(dtab, i); + + next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), + struct bpf_dtab_netdev, + index_hlist); + if (next_dev) { + *next = next_dev->idx; + return 0; + } + } + + return -ENOENT; +} + static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags, bool in_napi_ctx) { @@ -373,6 +482,15 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key) return dev ? &dev->ifindex : NULL; } +static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map, + *(u32 *)key); + struct net_device *dev = obj ? obj->dev : NULL; + + return dev ? &dev->ifindex : NULL; +} + static void dev_map_flush_old(struct bpf_dtab_netdev *dev) { if (dev->dev->netdev_ops->ndo_xdp_xmit) { @@ -422,6 +540,28 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) return 0; } +static int dev_map_hash_delete_elem(struct bpf_map *map, void *key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct bpf_dtab_netdev *old_dev; + int k = *(u32 *)key; + unsigned long flags; + int ret = -ENOENT; + + spin_lock_irqsave(&dtab->index_lock, flags); + + old_dev = __dev_map_hash_lookup_elem(map, k); + if (old_dev) { + dtab->items--; + hlist_del_init_rcu(&old_dev->index_hlist); + call_rcu(&old_dev->rcu, __dev_map_entry_free); + ret = 0; + } + spin_unlock_irqrestore(&dtab->index_lock, flags); + + return ret; +} + static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, struct bpf_dtab *dtab, u32 ifindex, @@ -502,6 +642,56 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, map, key, value, map_flags); } +static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map, + void *key, void *value, u64 map_flags) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct bpf_dtab_netdev *dev, *old_dev; + u32 ifindex = *(u32 *)value; + u32 idx = *(u32 *)key; + unsigned long flags; + + if (unlikely(map_flags > BPF_EXIST || !ifindex)) + return -EINVAL; + + old_dev = __dev_map_hash_lookup_elem(map, idx); + if (old_dev && (map_flags & BPF_NOEXIST)) + return -EEXIST; + + dev = __dev_map_alloc_node(net, dtab, ifindex, idx); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + spin_lock_irqsave(&dtab->index_lock, flags); + + if (old_dev) { + hlist_del_rcu(&old_dev->index_hlist); + } else { + if (dtab->items >= dtab->map.max_entries) { + spin_unlock_irqrestore(&dtab->index_lock, flags); + call_rcu(&dev->rcu, __dev_map_entry_free); + return -E2BIG; + } + dtab->items++; + } + + hlist_add_head_rcu(&dev->index_hlist, + dev_map_index_hash(dtab, idx)); + spin_unlock_irqrestore(&dtab->index_lock, flags); + + if (old_dev) + call_rcu(&old_dev->rcu, __dev_map_entry_free); + + return 0; +} + +static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + return __dev_map_hash_update_elem(current->nsproxy->net_ns, + map, key, value, map_flags); +} + const struct bpf_map_ops dev_map_ops = { .map_alloc = dev_map_alloc, .map_free = dev_map_free, @@ -512,6 +702,16 @@ const struct bpf_map_ops dev_map_ops = { .map_check_btf = map_check_no_btf, }; +const struct bpf_map_ops dev_map_hash_ops = { + .map_alloc = dev_map_alloc, + .map_free = dev_map_free, + .map_get_next_key = dev_map_hash_get_next_key, + .map_lookup_elem = dev_map_hash_lookup_elem, + .map_update_elem = dev_map_hash_update_elem, + .map_delete_elem = dev_map_hash_delete_elem, + .map_check_btf = map_check_no_btf, +}; + static int dev_map_notification(struct notifier_block *notifier, ulong event, void *ptr) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5900cbb966b1..cef851cd5c36 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3457,6 +3457,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, goto error; break; case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: if (func_id != BPF_FUNC_redirect_map && func_id != BPF_FUNC_map_lookup_elem) goto error; @@ -3539,6 +3540,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, break; case BPF_FUNC_redirect_map: if (map->map_type != BPF_MAP_TYPE_DEVMAP && + map->map_type != BPF_MAP_TYPE_DEVMAP_HASH && map->map_type != BPF_MAP_TYPE_CPUMAP && map->map_type != BPF_MAP_TYPE_XSKMAP) goto error; |