diff options
Diffstat (limited to 'net/xdp')
-rw-r--r-- | net/xdp/xdp_umem.c | 225 | ||||
-rw-r--r-- | net/xdp/xdp_umem.h | 6 | ||||
-rw-r--r-- | net/xdp/xsk.c | 220 | ||||
-rw-r--r-- | net/xdp/xsk.h | 11 | ||||
-rw-r--r-- | net/xdp/xsk_buff_pool.c | 380 | ||||
-rw-r--r-- | net/xdp/xsk_diag.c | 20 | ||||
-rw-r--r-- | net/xdp/xsk_queue.h | 18 | ||||
-rw-r--r-- | net/xdp/xskmap.c | 15 |
8 files changed, 528 insertions, 367 deletions
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index b010bfde0149..56d052bc65cb 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -23,162 +23,6 @@ static DEFINE_IDA(umem_ida); -void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) -{ - unsigned long flags; - - if (!xs->tx) - return; - - spin_lock_irqsave(&umem->xsk_tx_list_lock, flags); - list_add_rcu(&xs->list, &umem->xsk_tx_list); - spin_unlock_irqrestore(&umem->xsk_tx_list_lock, flags); -} - -void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) -{ - unsigned long flags; - - if (!xs->tx) - return; - - spin_lock_irqsave(&umem->xsk_tx_list_lock, flags); - list_del_rcu(&xs->list); - spin_unlock_irqrestore(&umem->xsk_tx_list_lock, flags); -} - -/* The umem is stored both in the _rx struct and the _tx struct as we do - * not know if the device has more tx queues than rx, or the opposite. - * This might also change during run time. - */ -static int xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem, - u16 queue_id) -{ - if (queue_id >= max_t(unsigned int, - dev->real_num_rx_queues, - dev->real_num_tx_queues)) - return -EINVAL; - - if (queue_id < dev->real_num_rx_queues) - dev->_rx[queue_id].umem = umem; - if (queue_id < dev->real_num_tx_queues) - dev->_tx[queue_id].umem = umem; - - return 0; -} - -struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, - u16 queue_id) -{ - if (queue_id < dev->real_num_rx_queues) - return dev->_rx[queue_id].umem; - if (queue_id < dev->real_num_tx_queues) - return dev->_tx[queue_id].umem; - - return NULL; -} -EXPORT_SYMBOL(xdp_get_umem_from_qid); - -static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id) -{ - if (queue_id < dev->real_num_rx_queues) - dev->_rx[queue_id].umem = NULL; - if (queue_id < dev->real_num_tx_queues) - dev->_tx[queue_id].umem = NULL; -} - -int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, - u16 queue_id, u16 flags) -{ - bool force_zc, force_copy; - struct netdev_bpf bpf; - int err = 0; - - ASSERT_RTNL(); - - force_zc = flags & XDP_ZEROCOPY; - force_copy = flags & XDP_COPY; - - if (force_zc && force_copy) - return -EINVAL; - - if (xdp_get_umem_from_qid(dev, queue_id)) - return -EBUSY; - - err = xdp_reg_umem_at_qid(dev, umem, queue_id); - if (err) - return err; - - umem->dev = dev; - umem->queue_id = queue_id; - - if (flags & XDP_USE_NEED_WAKEUP) { - umem->flags |= XDP_UMEM_USES_NEED_WAKEUP; - /* Tx needs to be explicitly woken up the first time. - * Also for supporting drivers that do not implement this - * feature. They will always have to call sendto(). - */ - xsk_set_tx_need_wakeup(umem); - } - - dev_hold(dev); - - if (force_copy) - /* For copy-mode, we are done. */ - return 0; - - if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_wakeup) { - err = -EOPNOTSUPP; - goto err_unreg_umem; - } - - bpf.command = XDP_SETUP_XSK_UMEM; - bpf.xsk.umem = umem; - bpf.xsk.queue_id = queue_id; - - err = dev->netdev_ops->ndo_bpf(dev, &bpf); - if (err) - goto err_unreg_umem; - - umem->zc = true; - return 0; - -err_unreg_umem: - if (!force_zc) - err = 0; /* fallback to copy mode */ - if (err) - xdp_clear_umem_at_qid(dev, queue_id); - return err; -} - -void xdp_umem_clear_dev(struct xdp_umem *umem) -{ - struct netdev_bpf bpf; - int err; - - ASSERT_RTNL(); - - if (!umem->dev) - return; - - if (umem->zc) { - bpf.command = XDP_SETUP_XSK_UMEM; - bpf.xsk.umem = NULL; - bpf.xsk.queue_id = umem->queue_id; - - err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf); - - if (err) - WARN(1, "failed to disable umem!\n"); - } - - xdp_clear_umem_at_qid(umem->dev, umem->queue_id); - - dev_put(umem->dev); - umem->dev = NULL; - umem->zc = false; -} - static void xdp_umem_unpin_pages(struct xdp_umem *umem) { unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); @@ -195,38 +39,33 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem) } } -static void xdp_umem_release(struct xdp_umem *umem) +static void xdp_umem_addr_unmap(struct xdp_umem *umem) { - rtnl_lock(); - xdp_umem_clear_dev(umem); - rtnl_unlock(); - - ida_simple_remove(&umem_ida, umem->id); + vunmap(umem->addrs); + umem->addrs = NULL; +} - if (umem->fq) { - xskq_destroy(umem->fq); - umem->fq = NULL; - } +static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, + u32 nr_pages) +{ + umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); + if (!umem->addrs) + return -ENOMEM; + return 0; +} - if (umem->cq) { - xskq_destroy(umem->cq); - umem->cq = NULL; - } +static void xdp_umem_release(struct xdp_umem *umem) +{ + umem->zc = false; + ida_simple_remove(&umem_ida, umem->id); - xp_destroy(umem->pool); + xdp_umem_addr_unmap(umem); xdp_umem_unpin_pages(umem); xdp_umem_unaccount_pages(umem); kfree(umem); } -static void xdp_umem_release_deferred(struct work_struct *work) -{ - struct xdp_umem *umem = container_of(work, struct xdp_umem, work); - - xdp_umem_release(umem); -} - void xdp_get_umem(struct xdp_umem *umem) { refcount_inc(&umem->users); @@ -237,10 +76,8 @@ void xdp_put_umem(struct xdp_umem *umem) if (!umem) return; - if (refcount_dec_and_test(&umem->users)) { - INIT_WORK(&umem->work, xdp_umem_release_deferred); - schedule_work(&umem->work); - } + if (refcount_dec_and_test(&umem->users)) + xdp_umem_release(umem); } static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) @@ -319,8 +156,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return -EINVAL; } - if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNK_FLAG | - XDP_UMEM_USES_NEED_WAKEUP)) + if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) return -EINVAL; if (!unaligned_chunks && !is_power_of_2(chunk_size)) @@ -355,13 +191,13 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) umem->size = size; umem->headroom = headroom; umem->chunk_size = chunk_size; + umem->chunks = chunks; umem->npgs = (u32)npgs; umem->pgs = NULL; umem->user = NULL; umem->flags = mr->flags; - INIT_LIST_HEAD(&umem->xsk_tx_list); - spin_lock_init(&umem->xsk_tx_list_lock); + INIT_LIST_HEAD(&umem->xsk_dma_list); refcount_set(&umem->users, 1); err = xdp_umem_account_pages(umem); @@ -372,15 +208,13 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (err) goto out_account; - umem->pool = xp_create(umem->pgs, umem->npgs, chunks, chunk_size, - headroom, size, unaligned_chunks); - if (!umem->pool) { - err = -ENOMEM; - goto out_pin; - } + err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); + if (err) + goto out_unpin; + return 0; -out_pin: +out_unpin: xdp_umem_unpin_pages(umem); out_account: xdp_umem_unaccount_pages(umem); @@ -412,8 +246,3 @@ struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) return umem; } - -bool xdp_umem_validate_queues(struct xdp_umem *umem) -{ - return umem->fq && umem->cq; -} diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h index 32067fe98f65..181fdda2f2a8 100644 --- a/net/xdp/xdp_umem.h +++ b/net/xdp/xdp_umem.h @@ -8,14 +8,8 @@ #include <net/xdp_sock_drv.h> -int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, - u16 queue_id, u16 flags); -void xdp_umem_clear_dev(struct xdp_umem *umem); -bool xdp_umem_validate_queues(struct xdp_umem *umem); void xdp_get_umem(struct xdp_umem *umem); void xdp_put_umem(struct xdp_umem *umem); -void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs); -void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs); struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr); #endif /* XDP_UMEM_H_ */ diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 6c5e09e7440a..cfbec3989a76 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -33,71 +33,105 @@ static DEFINE_PER_CPU(struct list_head, xskmap_flush_list); -bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) +void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool) { - return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) && - READ_ONCE(xs->umem->fq); -} - -void xsk_set_rx_need_wakeup(struct xdp_umem *umem) -{ - if (umem->need_wakeup & XDP_WAKEUP_RX) + if (pool->cached_need_wakeup & XDP_WAKEUP_RX) return; - umem->fq->ring->flags |= XDP_RING_NEED_WAKEUP; - umem->need_wakeup |= XDP_WAKEUP_RX; + pool->fq->ring->flags |= XDP_RING_NEED_WAKEUP; + pool->cached_need_wakeup |= XDP_WAKEUP_RX; } EXPORT_SYMBOL(xsk_set_rx_need_wakeup); -void xsk_set_tx_need_wakeup(struct xdp_umem *umem) +void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool) { struct xdp_sock *xs; - if (umem->need_wakeup & XDP_WAKEUP_TX) + if (pool->cached_need_wakeup & XDP_WAKEUP_TX) return; rcu_read_lock(); - list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) { + list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP; } rcu_read_unlock(); - umem->need_wakeup |= XDP_WAKEUP_TX; + pool->cached_need_wakeup |= XDP_WAKEUP_TX; } EXPORT_SYMBOL(xsk_set_tx_need_wakeup); -void xsk_clear_rx_need_wakeup(struct xdp_umem *umem) +void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool) { - if (!(umem->need_wakeup & XDP_WAKEUP_RX)) + if (!(pool->cached_need_wakeup & XDP_WAKEUP_RX)) return; - umem->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP; - umem->need_wakeup &= ~XDP_WAKEUP_RX; + pool->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP; + pool->cached_need_wakeup &= ~XDP_WAKEUP_RX; } EXPORT_SYMBOL(xsk_clear_rx_need_wakeup); -void xsk_clear_tx_need_wakeup(struct xdp_umem *umem) +void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool) { struct xdp_sock *xs; - if (!(umem->need_wakeup & XDP_WAKEUP_TX)) + if (!(pool->cached_need_wakeup & XDP_WAKEUP_TX)) return; rcu_read_lock(); - list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) { + list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP; } rcu_read_unlock(); - umem->need_wakeup &= ~XDP_WAKEUP_TX; + pool->cached_need_wakeup &= ~XDP_WAKEUP_TX; } EXPORT_SYMBOL(xsk_clear_tx_need_wakeup); -bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem) +bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool) +{ + return pool->uses_need_wakeup; +} +EXPORT_SYMBOL(xsk_uses_need_wakeup); + +struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, + u16 queue_id) +{ + if (queue_id < dev->real_num_rx_queues) + return dev->_rx[queue_id].pool; + if (queue_id < dev->real_num_tx_queues) + return dev->_tx[queue_id].pool; + + return NULL; +} +EXPORT_SYMBOL(xsk_get_pool_from_qid); + +void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) +{ + if (queue_id < dev->real_num_rx_queues) + dev->_rx[queue_id].pool = NULL; + if (queue_id < dev->real_num_tx_queues) + dev->_tx[queue_id].pool = NULL; +} + +/* The buffer pool is stored both in the _rx struct and the _tx struct as we do + * not know if the device has more tx queues than rx, or the opposite. + * This might also change during run time. + */ +int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, + u16 queue_id) { - return umem->flags & XDP_UMEM_USES_NEED_WAKEUP; + if (queue_id >= max_t(unsigned int, + dev->real_num_rx_queues, + dev->real_num_tx_queues)) + return -EINVAL; + + if (queue_id < dev->real_num_rx_queues) + dev->_rx[queue_id].pool = pool; + if (queue_id < dev->real_num_tx_queues) + dev->_tx[queue_id].pool = pool; + + return 0; } -EXPORT_SYMBOL(xsk_umem_uses_need_wakeup); void xp_release(struct xdp_buff_xsk *xskb) { @@ -155,12 +189,12 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len, struct xdp_buff *xsk_xdp; int err; - if (len > xsk_umem_get_rx_frame_size(xs->umem)) { + if (len > xsk_pool_get_rx_frame_size(xs->pool)) { xs->rx_dropped++; return -ENOSPC; } - xsk_xdp = xsk_buff_alloc(xs->umem); + xsk_xdp = xsk_buff_alloc(xs->pool); if (!xsk_xdp) { xs->rx_dropped++; return -ENOSPC; @@ -208,7 +242,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, static void xsk_flush(struct xdp_sock *xs) { xskq_prod_submit(xs->rx); - __xskq_cons_release(xs->umem->fq); + __xskq_cons_release(xs->pool->fq); sock_def_readable(&xs->sk); } @@ -249,32 +283,32 @@ void __xsk_map_flush(void) } } -void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) +void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries) { - xskq_prod_submit_n(umem->cq, nb_entries); + xskq_prod_submit_n(pool->cq, nb_entries); } -EXPORT_SYMBOL(xsk_umem_complete_tx); +EXPORT_SYMBOL(xsk_tx_completed); -void xsk_umem_consume_tx_done(struct xdp_umem *umem) +void xsk_tx_release(struct xsk_buff_pool *pool) { struct xdp_sock *xs; rcu_read_lock(); - list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) { + list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { __xskq_cons_release(xs->tx); xs->sk.sk_write_space(&xs->sk); } rcu_read_unlock(); } -EXPORT_SYMBOL(xsk_umem_consume_tx_done); +EXPORT_SYMBOL(xsk_tx_release); -bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc) +bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { struct xdp_sock *xs; rcu_read_lock(); - list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) { - if (!xskq_cons_peek_desc(xs->tx, desc, umem)) { + list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { + if (!xskq_cons_peek_desc(xs->tx, desc, pool)) { xs->tx->queue_empty_descs++; continue; } @@ -284,7 +318,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc) * if there is space in it. This avoids having to implement * any buffering in the Tx path. */ - if (xskq_prod_reserve_addr(umem->cq, desc->addr)) + if (xskq_prod_reserve_addr(pool->cq, desc->addr)) goto out; xskq_cons_release(xs->tx); @@ -296,7 +330,7 @@ out: rcu_read_unlock(); return false; } -EXPORT_SYMBOL(xsk_umem_consume_tx); +EXPORT_SYMBOL(xsk_tx_peek_desc); static int xsk_wakeup(struct xdp_sock *xs, u8 flags) { @@ -322,7 +356,7 @@ static void xsk_destruct_skb(struct sk_buff *skb) unsigned long flags; spin_lock_irqsave(&xs->tx_completion_lock, flags); - xskq_prod_submit_addr(xs->umem->cq, addr); + xskq_prod_submit_addr(xs->pool->cq, addr); spin_unlock_irqrestore(&xs->tx_completion_lock, flags); sock_wfree(skb); @@ -342,7 +376,7 @@ static int xsk_generic_xmit(struct sock *sk) if (xs->queue_id >= xs->dev->real_num_tx_queues) goto out; - while (xskq_cons_peek_desc(xs->tx, &desc, xs->umem)) { + while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) { char *buffer; u64 addr; u32 len; @@ -359,14 +393,14 @@ static int xsk_generic_xmit(struct sock *sk) skb_put(skb, len); addr = desc.addr; - buffer = xsk_buff_raw_get_data(xs->umem, addr); + buffer = xsk_buff_raw_get_data(xs->pool, addr); err = skb_store_bits(skb, 0, buffer, len); /* This is the backpressure mechanism for the Tx path. * Reserve space in the completion queue and only proceed * if there is space in it. This avoids having to implement * any buffering in the Tx path. */ - if (unlikely(err) || xskq_prod_reserve(xs->umem->cq)) { + if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) { kfree_skb(skb); goto out; } @@ -446,16 +480,16 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, __poll_t mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); - struct xdp_umem *umem; + struct xsk_buff_pool *pool; if (unlikely(!xsk_is_bound(xs))) return mask; - umem = xs->umem; + pool = xs->pool; - if (umem->need_wakeup) { + if (pool->cached_need_wakeup) { if (xs->zc) - xsk_wakeup(xs, umem->need_wakeup); + xsk_wakeup(xs, pool->cached_need_wakeup); else /* Poll needs to drive Tx also in copy mode */ __xsk_sendmsg(sk); @@ -496,7 +530,7 @@ static void xsk_unbind_dev(struct xdp_sock *xs) WRITE_ONCE(xs->state, XSK_UNBOUND); /* Wait for driver to stop using the xdp socket. */ - xdp_del_sk_umem(xs->umem, xs); + xp_del_xsk(xs->pool, xs); xs->dev = NULL; synchronize_net(); dev_put(dev); @@ -574,6 +608,8 @@ static int xsk_release(struct socket *sock) xskq_destroy(xs->rx); xskq_destroy(xs->tx); + xskq_destroy(xs->fq_tmp); + xskq_destroy(xs->cq_tmp); sock_orphan(sk); sock->sk = NULL; @@ -601,6 +637,11 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd) return sock; } +static bool xsk_validate_queues(struct xdp_sock *xs) +{ + return xs->fq_tmp && xs->cq_tmp; +} + static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; @@ -669,29 +710,66 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) sockfd_put(sock); goto out_unlock; } - if (umem_xs->dev != dev || umem_xs->queue_id != qid) { - err = -EINVAL; - sockfd_put(sock); - goto out_unlock; + + if (umem_xs->queue_id != qid || umem_xs->dev != dev) { + /* Share the umem with another socket on another qid + * and/or device. + */ + xs->pool = xp_create_and_assign_umem(xs, + umem_xs->umem); + if (!xs->pool) { + err = -ENOMEM; + sockfd_put(sock); + goto out_unlock; + } + + err = xp_assign_dev_shared(xs->pool, umem_xs->umem, + dev, qid); + if (err) { + xp_destroy(xs->pool); + xs->pool = NULL; + sockfd_put(sock); + goto out_unlock; + } + } else { + /* Share the buffer pool with the other socket. */ + if (xs->fq_tmp || xs->cq_tmp) { + /* Do not allow setting your own fq or cq. */ + err = -EINVAL; + sockfd_put(sock); + goto out_unlock; + } + + xp_get_pool(umem_xs->pool); + xs->pool = umem_xs->pool; } xdp_get_umem(umem_xs->umem); WRITE_ONCE(xs->umem, umem_xs->umem); sockfd_put(sock); - } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) { + } else if (!xs->umem || !xsk_validate_queues(xs)) { err = -EINVAL; goto out_unlock; } else { /* This xsk has its own umem. */ - err = xdp_umem_assign_dev(xs->umem, dev, qid, flags); - if (err) + xs->pool = xp_create_and_assign_umem(xs, xs->umem); + if (!xs->pool) { + err = -ENOMEM; goto out_unlock; + } + + err = xp_assign_dev(xs->pool, dev, qid, flags); + if (err) { + xp_destroy(xs->pool); + xs->pool = NULL; + goto out_unlock; + } } xs->dev = dev; xs->zc = xs->umem->zc; xs->queue_id = qid; - xdp_add_sk_umem(xs->umem, xs); + xp_add_xsk(xs->pool, xs); out_unlock: if (err) { @@ -797,16 +875,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, mutex_unlock(&xs->mutex); return -EBUSY; } - if (!xs->umem) { - mutex_unlock(&xs->mutex); - return -EINVAL; - } - q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq : - &xs->umem->cq; + q = (optname == XDP_UMEM_FILL_RING) ? &xs->fq_tmp : + &xs->cq_tmp; err = xsk_init_queue(entries, q, true); - if (optname == XDP_UMEM_FILL_RING) - xp_set_fq(xs->umem->pool, *q); mutex_unlock(&xs->mutex); return err; } @@ -873,7 +945,7 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname, if (extra_stats) { stats.rx_ring_full = xs->rx_queue_full; stats.rx_fill_ring_empty_descs = - xs->umem ? xskq_nb_queue_empty_descs(xs->umem->fq) : 0; + xs->pool ? xskq_nb_queue_empty_descs(xs->pool->fq) : 0; stats.tx_ring_empty_descs = xskq_nb_queue_empty_descs(xs->tx); } else { stats.rx_dropped += xs->rx_queue_full; @@ -975,7 +1047,6 @@ static int xsk_mmap(struct file *file, struct socket *sock, unsigned long size = vma->vm_end - vma->vm_start; struct xdp_sock *xs = xdp_sk(sock->sk); struct xsk_queue *q = NULL; - struct xdp_umem *umem; unsigned long pfn; struct page *qpg; @@ -987,16 +1058,12 @@ static int xsk_mmap(struct file *file, struct socket *sock, } else if (offset == XDP_PGOFF_TX_RING) { q = READ_ONCE(xs->tx); } else { - umem = READ_ONCE(xs->umem); - if (!umem) - return -EINVAL; - /* Matches the smp_wmb() in XDP_UMEM_REG */ smp_rmb(); if (offset == XDP_UMEM_PGOFF_FILL_RING) - q = READ_ONCE(umem->fq); + q = READ_ONCE(xs->fq_tmp); else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING) - q = READ_ONCE(umem->cq); + q = READ_ONCE(xs->cq_tmp); } if (!q) @@ -1034,8 +1101,8 @@ static int xsk_notifier(struct notifier_block *this, xsk_unbind_dev(xs); - /* Clear device references in umem. */ - xdp_umem_clear_dev(xs->umem); + /* Clear device references. */ + xp_clear_dev(xs->pool); } mutex_unlock(&xs->mutex); } @@ -1079,7 +1146,8 @@ static void xsk_destruct(struct sock *sk) if (!sock_flag(sk, SOCK_DEAD)) return; - xdp_put_umem(xs->umem); + if (!xp_put_pool(xs->pool)) + xdp_put_umem(xs->umem); sk_refcnt_debug_dec(sk); } @@ -1087,8 +1155,8 @@ static void xsk_destruct(struct sock *sk) static int xsk_create(struct net *net, struct socket *sock, int protocol, int kern) { - struct sock *sk; struct xdp_sock *xs; + struct sock *sk; if (!ns_capable(net->user_ns, CAP_NET_RAW)) return -EPERM; diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h index 455ddd480f3d..b9e896cee5bb 100644 --- a/net/xdp/xsk.h +++ b/net/xdp/xsk.h @@ -11,13 +11,6 @@ #define XSK_NEXT_PG_CONTIG_SHIFT 0 #define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT) -/* Flags for the umem flags field. - * - * The NEED_WAKEUP flag is 1 due to the reuse of the flags field for public - * flags. See inlude/uapi/include/linux/if_xdp.h. - */ -#define XDP_UMEM_USES_NEED_WAKEUP BIT(1) - struct xdp_ring_offset_v1 { __u64 producer; __u64 consumer; @@ -46,10 +39,12 @@ static inline struct xdp_sock *xdp_sk(struct sock *sk) return (struct xdp_sock *)sk; } -bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, struct xdp_sock **map_entry); int xsk_map_inc(struct xsk_map *map); void xsk_map_put(struct xsk_map *map); +void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id); +int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, + u16 queue_id); #endif /* XSK_H_ */ diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index a2044c245215..8a3bf4e1318e 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -2,21 +2,34 @@ #include <net/xsk_buff_pool.h> #include <net/xdp_sock.h> +#include <net/xdp_sock_drv.h> #include "xsk_queue.h" +#include "xdp_umem.h" +#include "xsk.h" -static void xp_addr_unmap(struct xsk_buff_pool *pool) +void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs) { - vunmap(pool->addrs); + unsigned long flags; + + if (!xs->tx) + return; + + spin_lock_irqsave(&pool->xsk_tx_list_lock, flags); + list_add_rcu(&xs->tx_list, &pool->xsk_tx_list); + spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags); } -static int xp_addr_map(struct xsk_buff_pool *pool, - struct page **pages, u32 nr_pages) +void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs) { - pool->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); - if (!pool->addrs) - return -ENOMEM; - return 0; + unsigned long flags; + + if (!xs->tx) + return; + + spin_lock_irqsave(&pool->xsk_tx_list_lock, flags); + list_del_rcu(&xs->tx_list); + spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags); } void xp_destroy(struct xsk_buff_pool *pool) @@ -24,59 +37,61 @@ void xp_destroy(struct xsk_buff_pool *pool) if (!pool) return; - xp_addr_unmap(pool); kvfree(pool->heads); kvfree(pool); } -struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks, - u32 chunk_size, u32 headroom, u64 size, - bool unaligned) +struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, + struct xdp_umem *umem) { struct xsk_buff_pool *pool; struct xdp_buff_xsk *xskb; - int err; u32 i; - pool = kvzalloc(struct_size(pool, free_heads, chunks), GFP_KERNEL); + pool = kvzalloc(struct_size(pool, free_heads, umem->chunks), + GFP_KERNEL); if (!pool) goto out; - pool->heads = kvcalloc(chunks, sizeof(*pool->heads), GFP_KERNEL); + pool->heads = kvcalloc(umem->chunks, sizeof(*pool->heads), GFP_KERNEL); if (!pool->heads) goto out; - pool->chunk_mask = ~((u64)chunk_size - 1); - pool->addrs_cnt = size; - pool->heads_cnt = chunks; - pool->free_heads_cnt = chunks; - pool->headroom = headroom; - pool->chunk_size = chunk_size; - pool->unaligned = unaligned; - pool->frame_len = chunk_size - headroom - XDP_PACKET_HEADROOM; + pool->chunk_mask = ~((u64)umem->chunk_size - 1); + pool->addrs_cnt = umem->size; + pool->heads_cnt = umem->chunks; + pool->free_heads_cnt = umem->chunks; + pool->headroom = umem->headroom; + pool->chunk_size = umem->chunk_size; + pool->unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; + pool->frame_len = umem->chunk_size - umem->headroom - + XDP_PACKET_HEADROOM; + pool->umem = umem; + pool->addrs = umem->addrs; INIT_LIST_HEAD(&pool->free_list); + INIT_LIST_HEAD(&pool->xsk_tx_list); + spin_lock_init(&pool->xsk_tx_list_lock); + refcount_set(&pool->users, 1); + + pool->fq = xs->fq_tmp; + pool->cq = xs->cq_tmp; + xs->fq_tmp = NULL; + xs->cq_tmp = NULL; for (i = 0; i < pool->free_heads_cnt; i++) { xskb = &pool->heads[i]; xskb->pool = pool; - xskb->xdp.frame_sz = chunk_size - headroom; + xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; pool->free_heads[i] = xskb; } - err = xp_addr_map(pool, pages, nr_pages); - if (!err) - return pool; + return pool; out: xp_destroy(pool); return NULL; } -void xp_set_fq(struct xsk_buff_pool *pool, struct xsk_queue *fq) -{ - pool->fq = fq; -} - void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { u32 i; @@ -86,70 +101,323 @@ void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) } EXPORT_SYMBOL(xp_set_rxq_info); -void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) +static void xp_disable_drv_zc(struct xsk_buff_pool *pool) { - dma_addr_t *dma; - u32 i; + struct netdev_bpf bpf; + int err; - if (pool->dma_pages_cnt == 0) + ASSERT_RTNL(); + + if (pool->umem->zc) { + bpf.command = XDP_SETUP_XSK_POOL; + bpf.xsk.pool = NULL; + bpf.xsk.queue_id = pool->queue_id; + + err = pool->netdev->netdev_ops->ndo_bpf(pool->netdev, &bpf); + + if (err) + WARN(1, "Failed to disable zero-copy!\n"); + } +} + +static int __xp_assign_dev(struct xsk_buff_pool *pool, + struct net_device *netdev, u16 queue_id, u16 flags) +{ + bool force_zc, force_copy; + struct netdev_bpf bpf; + int err = 0; + + ASSERT_RTNL(); + + force_zc = flags & XDP_ZEROCOPY; + force_copy = flags & XDP_COPY; + + if (force_zc && force_copy) + return -EINVAL; + + if (xsk_get_pool_from_qid(netdev, queue_id)) + return -EBUSY; + + pool->netdev = netdev; + pool->queue_id = queue_id; + err = xsk_reg_pool_at_qid(netdev, pool, queue_id); + if (err) + return err; + + if (flags & XDP_USE_NEED_WAKEUP) { + pool->uses_need_wakeup = true; + /* Tx needs to be explicitly woken up the first time. + * Also for supporting drivers that do not implement this + * feature. They will always have to call sendto(). + */ + pool->cached_need_wakeup = XDP_WAKEUP_TX; + } + + dev_hold(netdev); + + if (force_copy) + /* For copy-mode, we are done. */ + return 0; + + if (!netdev->netdev_ops->ndo_bpf || + !netdev->netdev_ops->ndo_xsk_wakeup) { + err = -EOPNOTSUPP; + goto err_unreg_pool; + } + + bpf.command = XDP_SETUP_XSK_POOL; + bpf.xsk.pool = pool; + bpf.xsk.queue_id = queue_id; + + err = netdev->netdev_ops->ndo_bpf(netdev, &bpf); + if (err) + goto err_unreg_pool; + + if (!pool->dma_pages) { + WARN(1, "Driver did not DMA map zero-copy buffers"); + goto err_unreg_xsk; + } + pool->umem->zc = true; + return 0; + +err_unreg_xsk: + xp_disable_drv_zc(pool); +err_unreg_pool: + if (!force_zc) + err = 0; /* fallback to copy mode */ + if (err) + xsk_clear_pool_at_qid(netdev, queue_id); + return err; +} + +int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, + u16 queue_id, u16 flags) +{ + return __xp_assign_dev(pool, dev, queue_id, flags); +} + +int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, + struct net_device *dev, u16 queue_id) +{ + u16 flags; + + /* One fill and completion ring required for each queue id. */ + if (!pool->fq || !pool->cq) + return -EINVAL; + + flags = umem->zc ? XDP_ZEROCOPY : XDP_COPY; + if (pool->uses_need_wakeup) + flags |= XDP_USE_NEED_WAKEUP; + + return __xp_assign_dev(pool, dev, queue_id, flags); +} + +void xp_clear_dev(struct xsk_buff_pool *pool) +{ + if (!pool->netdev) return; - for (i = 0; i < pool->dma_pages_cnt; i++) { - dma = &pool->dma_pages[i]; + xp_disable_drv_zc(pool); + xsk_clear_pool_at_qid(pool->netdev, pool->queue_id); + dev_put(pool->netdev); + pool->netdev = NULL; +} + +static void xp_release_deferred(struct work_struct *work) +{ + struct xsk_buff_pool *pool = container_of(work, struct xsk_buff_pool, + work); + + rtnl_lock(); + xp_clear_dev(pool); + rtnl_unlock(); + + if (pool->fq) { + xskq_destroy(pool->fq); + pool->fq = NULL; + } + + if (pool->cq) { + xskq_destroy(pool->cq); + pool->cq = NULL; + } + + xdp_put_umem(pool->umem); + xp_destroy(pool); +} + +void xp_get_pool(struct xsk_buff_pool *pool) +{ + refcount_inc(&pool->users); +} + +bool xp_put_pool(struct xsk_buff_pool *pool) +{ + if (!pool) + return false; + + if (refcount_dec_and_test(&pool->users)) { + INIT_WORK(&pool->work, xp_release_deferred); + schedule_work(&pool->work); + return true; + } + + return false; +} + +static struct xsk_dma_map *xp_find_dma_map(struct xsk_buff_pool *pool) +{ + struct xsk_dma_map *dma_map; + + list_for_each_entry(dma_map, &pool->umem->xsk_dma_list, list) { + if (dma_map->netdev == pool->netdev) + return dma_map; + } + + return NULL; +} + +static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_device *netdev, + u32 nr_pages, struct xdp_umem *umem) +{ + struct xsk_dma_map *dma_map; + + dma_map = kzalloc(sizeof(*dma_map), GFP_KERNEL); + if (!dma_map) + return NULL; + + dma_map->dma_pages = kvcalloc(nr_pages, sizeof(*dma_map->dma_pages), GFP_KERNEL); + if (!dma_map->dma_pages) { + kfree(dma_map); + return NULL; + } + + dma_map->netdev = netdev; + dma_map->dev = dev; + dma_map->dma_need_sync = false; + dma_map->dma_pages_cnt = nr_pages; + refcount_set(&dma_map->users, 1); + list_add(&dma_map->list, &umem->xsk_dma_list); + return dma_map; +} + +static void xp_destroy_dma_map(struct xsk_dma_map *dma_map) +{ + list_del(&dma_map->list); + kvfree(dma_map->dma_pages); + kfree(dma_map); +} + +static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs) +{ + dma_addr_t *dma; + u32 i; + + for (i = 0; i < dma_map->dma_pages_cnt; i++) { + dma = &dma_map->dma_pages[i]; if (*dma) { - dma_unmap_page_attrs(pool->dev, *dma, PAGE_SIZE, + dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE, DMA_BIDIRECTIONAL, attrs); *dma = 0; } } + xp_destroy_dma_map(dma_map); +} + +void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) +{ + struct xsk_dma_map *dma_map; + + if (pool->dma_pages_cnt == 0) + return; + + dma_map = xp_find_dma_map(pool); + if (!dma_map) { + WARN(1, "Could not find dma_map for device"); + return; + } + + if (!refcount_dec_and_test(&dma_map->users)) + return; + + __xp_dma_unmap(dma_map, attrs); kvfree(pool->dma_pages); pool->dma_pages_cnt = 0; pool->dev = NULL; } EXPORT_SYMBOL(xp_dma_unmap); -static void xp_check_dma_contiguity(struct xsk_buff_pool *pool) +static void xp_check_dma_contiguity(struct xsk_dma_map *dma_map) { u32 i; - for (i = 0; i < pool->dma_pages_cnt - 1; i++) { - if (pool->dma_pages[i] + PAGE_SIZE == pool->dma_pages[i + 1]) - pool->dma_pages[i] |= XSK_NEXT_PG_CONTIG_MASK; + for (i = 0; i < dma_map->dma_pages_cnt - 1; i++) { + if (dma_map->dma_pages[i] + PAGE_SIZE == dma_map->dma_pages[i + 1]) + dma_map->dma_pages[i] |= XSK_NEXT_PG_CONTIG_MASK; else - pool->dma_pages[i] &= ~XSK_NEXT_PG_CONTIG_MASK; + dma_map->dma_pages[i] &= ~XSK_NEXT_PG_CONTIG_MASK; } } +static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_map) +{ + pool->dma_pages = kvcalloc(dma_map->dma_pages_cnt, sizeof(*pool->dma_pages), GFP_KERNEL); + if (!pool->dma_pages) + return -ENOMEM; + + pool->dev = dma_map->dev; + pool->dma_pages_cnt = dma_map->dma_pages_cnt; + pool->dma_need_sync = dma_map->dma_need_sync; + memcpy(pool->dma_pages, dma_map->dma_pages, + pool->dma_pages_cnt * sizeof(*pool->dma_pages)); + + return 0; +} + int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, unsigned long attrs, struct page **pages, u32 nr_pages) { + struct xsk_dma_map *dma_map; dma_addr_t dma; + int err; u32 i; - pool->dma_pages = kvcalloc(nr_pages, sizeof(*pool->dma_pages), - GFP_KERNEL); - if (!pool->dma_pages) - return -ENOMEM; + dma_map = xp_find_dma_map(pool); + if (dma_map) { + err = xp_init_dma_info(pool, dma_map); + if (err) + return err; + + refcount_inc(&dma_map->users); + return 0; + } - pool->dev = dev; - pool->dma_pages_cnt = nr_pages; - pool->dma_need_sync = false; + dma_map = xp_create_dma_map(dev, pool->netdev, nr_pages, pool->umem); + if (!dma_map) + return -ENOMEM; - for (i = 0; i < pool->dma_pages_cnt; i++) { + for (i = 0; i < dma_map->dma_pages_cnt; i++) { dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, attrs); if (dma_mapping_error(dev, dma)) { - xp_dma_unmap(pool, attrs); + __xp_dma_unmap(dma_map, attrs); return -ENOMEM; } if (dma_need_sync(dev, dma)) - pool->dma_need_sync = true; - pool->dma_pages[i] = dma; + dma_map->dma_need_sync = true; + dma_map->dma_pages[i] = dma; } if (pool->unaligned) - xp_check_dma_contiguity(pool); + xp_check_dma_contiguity(dma_map); + + err = xp_init_dma_info(pool, dma_map); + if (err) { + __xp_dma_unmap(dma_map, attrs); + return err; + } + return 0; } EXPORT_SYMBOL(xp_dma_map); diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c index 21e9c2d123ee..c014217f5fa7 100644 --- a/net/xdp/xsk_diag.c +++ b/net/xdp/xsk_diag.c @@ -46,6 +46,7 @@ static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs, static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) { + struct xsk_buff_pool *pool = xs->pool; struct xdp_umem *umem = xs->umem; struct xdp_diag_umem du = {}; int err; @@ -58,21 +59,20 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) du.num_pages = umem->npgs; du.chunk_size = umem->chunk_size; du.headroom = umem->headroom; - du.ifindex = umem->dev ? umem->dev->ifindex : 0; - du.queue_id = umem->queue_id; + du.ifindex = (pool && pool->netdev) ? pool->netdev->ifindex : 0; + du.queue_id = pool ? pool->queue_id : 0; du.flags = 0; if (umem->zc) du.flags |= XDP_DU_F_ZEROCOPY; du.refs = refcount_read(&umem->users); err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du); - - if (!err && umem->fq) - err = xsk_diag_put_ring(umem->fq, XDP_DIAG_UMEM_FILL_RING, nlskb); - if (!err && umem->cq) { - err = xsk_diag_put_ring(umem->cq, XDP_DIAG_UMEM_COMPLETION_RING, - nlskb); - } + if (!err && pool && pool->fq) + err = xsk_diag_put_ring(pool->fq, + XDP_DIAG_UMEM_FILL_RING, nlskb); + if (!err && pool && pool->cq) + err = xsk_diag_put_ring(pool->cq, + XDP_DIAG_UMEM_COMPLETION_RING, nlskb); return err; } @@ -83,7 +83,7 @@ static int xsk_diag_put_stats(const struct xdp_sock *xs, struct sk_buff *nlskb) du.n_rx_dropped = xs->rx_dropped; du.n_rx_invalid = xskq_nb_invalid_descs(xs->rx); du.n_rx_full = xs->rx_queue_full; - du.n_fill_ring_empty = xs->umem ? xskq_nb_queue_empty_descs(xs->umem->fq) : 0; + du.n_fill_ring_empty = xs->pool ? xskq_nb_queue_empty_descs(xs->pool->fq) : 0; du.n_tx_invalid = xskq_nb_invalid_descs(xs->tx); du.n_tx_ring_empty = xskq_nb_queue_empty_descs(xs->tx); return nla_put(nlskb, XDP_DIAG_STATS, sizeof(du), &du); diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index bf42cfd74b89..cdb9cf3cd136 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -15,6 +15,10 @@ struct xdp_ring { u32 producer ____cacheline_aligned_in_smp; + /* Hinder the adjacent cache prefetcher to prefetch the consumer + * pointer if the producer pointer is touched and vice versa. + */ + u32 pad ____cacheline_aligned_in_smp; u32 consumer ____cacheline_aligned_in_smp; u32 flags; }; @@ -96,7 +100,7 @@ struct xsk_queue { * seen and read by the consumer. * * The consumer peeks into the ring to see if the producer has written - * any new entries. If so, the producer can then read these entries + * any new entries. If so, the consumer can then read these entries * and when it is done reading them release them back to the producer * so that the producer can use these slots to fill in new entries. * @@ -166,9 +170,9 @@ static inline bool xp_validate_desc(struct xsk_buff_pool *pool, static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d, - struct xdp_umem *umem) + struct xsk_buff_pool *pool) { - if (!xp_validate_desc(umem->pool, d)) { + if (!xp_validate_desc(pool, d)) { q->invalid_descs++; return false; } @@ -177,14 +181,14 @@ static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q, static inline bool xskq_cons_read_desc(struct xsk_queue *q, struct xdp_desc *desc, - struct xdp_umem *umem) + struct xsk_buff_pool *pool) { while (q->cached_cons != q->cached_prod) { struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; u32 idx = q->cached_cons & q->ring_mask; *desc = ring->desc[idx]; - if (xskq_cons_is_valid_desc(q, desc, umem)) + if (xskq_cons_is_valid_desc(q, desc, pool)) return true; q->cached_cons++; @@ -236,11 +240,11 @@ static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr) static inline bool xskq_cons_peek_desc(struct xsk_queue *q, struct xdp_desc *desc, - struct xdp_umem *umem) + struct xsk_buff_pool *pool) { if (q->cached_prod == q->cached_cons) xskq_cons_get_entries(q); - return xskq_cons_read_desc(q, desc, umem); + return xskq_cons_read_desc(q, desc, pool); } static inline void xskq_cons_release(struct xsk_queue *q) diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 8367adbbe9df..49da2b8ace8b 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -132,7 +132,7 @@ static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) return 0; } -static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2; struct bpf_insn *insn = insn_buf; @@ -185,11 +185,6 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, xs = (struct xdp_sock *)sock->sk; - if (!xsk_is_setup_for_bpf_map(xs)) { - sockfd_put(sock); - return -EOPNOTSUPP; - } - map_entry = &m->xsk_map[i]; node = xsk_map_node_alloc(m, map_entry); if (IS_ERR(node)) { @@ -254,8 +249,16 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, spin_unlock_bh(&map->lock); } +static bool xsk_map_meta_equal(const struct bpf_map *meta0, + const struct bpf_map *meta1) +{ + return meta0->max_entries == meta1->max_entries && + bpf_map_meta_equal(meta0, meta1); +} + static int xsk_map_btf_id; const struct bpf_map_ops xsk_map_ops = { + .map_meta_equal = xsk_map_meta_equal, .map_alloc = xsk_map_alloc, .map_free = xsk_map_free, .map_get_next_key = xsk_map_get_next_key, |