summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/xdp_sock.h4
-rw-r--r--include/net/xsk_buff_pool.h5
-rw-r--r--kernel/bpf/hashtab.c1
-rw-r--r--kernel/bpf/syscall.c1
-rw-r--r--kernel/bpf/task_iter.c18
-rw-r--r--net/xdp/xsk.c16
-rw-r--r--net/xdp/xsk_buff_pool.c3
-rw-r--r--net/xdp/xsk_queue.h5
-rw-r--r--tools/testing/selftests/bpf/Makefile3
-rw-r--r--tools/testing/selftests/bpf/test_maps.c48
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.c4
11 files changed, 81 insertions, 27 deletions
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 4f4e93bf814c..cc17bc957548 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -58,10 +58,6 @@ struct xdp_sock {
struct xsk_queue *tx ____cacheline_aligned_in_smp;
struct list_head tx_list;
- /* Mutual exclusion of NAPI TX thread and sendmsg error paths
- * in the SKB destructor callback.
- */
- spinlock_t tx_completion_lock;
/* Protects generic receive. */
spinlock_t rx_lock;
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 01755b838c74..eaa8386dbc63 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -73,6 +73,11 @@ struct xsk_buff_pool {
bool dma_need_sync;
bool unaligned;
void *addrs;
+ /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect:
+ * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when
+ * sockets share a single cq when the same netdev and queue id is shared.
+ */
+ spinlock_t cq_lock;
struct xdp_buff_xsk *free_heads[];
};
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 7e848200cd26..c1ac7f964bc9 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -152,6 +152,7 @@ static void htab_init_buckets(struct bpf_htab *htab)
lockdep_set_class(&htab->buckets[i].lock,
&htab->lockdep_key);
}
+ cond_resched();
}
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4caf06fe4152..c3bb03c8371f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -17,7 +17,6 @@
#include <linux/fs.h>
#include <linux/license.h>
#include <linux/filter.h>
-#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/idr.h>
#include <linux/cred.h>
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index e73c07593024..3efe38191d1c 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -37,7 +37,7 @@ retry:
if (!task) {
++*tid;
goto retry;
- } else if (skip_if_dup_files && task->tgid != task->pid &&
+ } else if (skip_if_dup_files && !thread_group_leader(task) &&
task->files == task->group_leader->files) {
put_task_struct(task);
task = NULL;
@@ -151,14 +151,14 @@ again:
curr_task = info->task;
curr_fd = info->fd;
} else {
- curr_task = task_seq_get_next(ns, &curr_tid, true);
- if (!curr_task) {
- info->task = NULL;
- return NULL;
- }
-
- /* set info->task and info->tid */
- info->task = curr_task;
+ curr_task = task_seq_get_next(ns, &curr_tid, true);
+ if (!curr_task) {
+ info->task = NULL;
+ info->tid = curr_tid;
+ return NULL;
+ }
+
+ /* set info->task and info->tid */
if (curr_tid == info->tid) {
curr_fd = info->fd;
} else {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ac4a317038f1..8037b04a9edd 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -423,9 +423,9 @@ static void xsk_destruct_skb(struct sk_buff *skb)
struct xdp_sock *xs = xdp_sk(skb->sk);
unsigned long flags;
- spin_lock_irqsave(&xs->tx_completion_lock, flags);
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
xskq_prod_submit_addr(xs->pool->cq, addr);
- spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
sock_wfree(skb);
}
@@ -437,6 +437,7 @@ static int xsk_generic_xmit(struct sock *sk)
bool sent_frame = false;
struct xdp_desc desc;
struct sk_buff *skb;
+ unsigned long flags;
int err = 0;
mutex_lock(&xs->mutex);
@@ -468,10 +469,13 @@ static int xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
kfree_skb(skb);
goto out;
}
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
skb->dev = xs->dev;
skb->priority = sk->sk_priority;
@@ -483,6 +487,9 @@ static int xsk_generic_xmit(struct sock *sk)
if (err == NETDEV_TX_BUSY) {
/* Tell user-space to retry the send */
skb->destructor = sock_wfree;
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ xskq_prod_cancel(xs->pool->cq);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
/* Free skb without triggering the perf drop trace */
consume_skb(skb);
err = -EAGAIN;
@@ -878,6 +885,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
}
+ /* FQ and CQ are now owned by the buffer pool and cleaned up with it. */
+ xs->fq_tmp = NULL;
+ xs->cq_tmp = NULL;
+
xs->dev = dev;
xs->zc = xs->umem->zc;
xs->queue_id = qid;
@@ -1299,7 +1310,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
xs->state = XSK_READY;
mutex_init(&xs->mutex);
spin_lock_init(&xs->rx_lock);
- spin_lock_init(&xs->tx_completion_lock);
INIT_LIST_HEAD(&xs->map_list);
spin_lock_init(&xs->map_list_lock);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 67a4494d63b6..20598eea658c 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -71,12 +71,11 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
spin_lock_init(&pool->xsk_tx_list_lock);
+ spin_lock_init(&pool->cq_lock);
refcount_set(&pool->users, 1);
pool->fq = xs->fq_tmp;
pool->cq = xs->cq_tmp;
- xs->fq_tmp = NULL;
- xs->cq_tmp = NULL;
for (i = 0; i < pool->free_heads_cnt; i++) {
xskb = &pool->heads[i];
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 4a9663aa7afe..2823b7c3302d 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -334,6 +334,11 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q)
return xskq_prod_nb_free(q, 1) ? false : true;
}
+static inline void xskq_prod_cancel(struct xsk_queue *q)
+{
+ q->cached_prod--;
+}
+
static inline int xskq_prod_reserve(struct xsk_queue *q)
{
if (xskq_prod_is_full(q))
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 8c33e999319a..c51df6b91bef 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -121,6 +121,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
/sys/kernel/btf/vmlinux \
/boot/vmlinux-$(shell uname -r)
VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+ifeq ($(VMLINUX_BTF),)
+$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
+endif
# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
# to build individual tests.
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 0ad3e6305ff0..51adc42b2b40 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1312,22 +1312,58 @@ static void test_map_stress(void)
#define DO_UPDATE 1
#define DO_DELETE 0
+#define MAP_RETRIES 20
+
+static int map_update_retriable(int map_fd, const void *key, const void *value,
+ int flags, int attempts)
+{
+ while (bpf_map_update_elem(map_fd, key, value, flags)) {
+ if (!attempts || (errno != EAGAIN && errno != EBUSY))
+ return -errno;
+
+ usleep(1);
+ attempts--;
+ }
+
+ return 0;
+}
+
+static int map_delete_retriable(int map_fd, const void *key, int attempts)
+{
+ while (bpf_map_delete_elem(map_fd, key)) {
+ if (!attempts || (errno != EAGAIN && errno != EBUSY))
+ return -errno;
+
+ usleep(1);
+ attempts--;
+ }
+
+ return 0;
+}
+
static void test_update_delete(unsigned int fn, void *data)
{
int do_update = ((int *)data)[1];
int fd = ((int *)data)[0];
- int i, key, value;
+ int i, key, value, err;
for (i = fn; i < MAP_SIZE; i += TASKS) {
key = value = i;
if (do_update) {
- assert(bpf_map_update_elem(fd, &key, &value,
- BPF_NOEXIST) == 0);
- assert(bpf_map_update_elem(fd, &key, &value,
- BPF_EXIST) == 0);
+ err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES);
+ if (err)
+ printf("error %d %d\n", err, errno);
+ assert(err == 0);
+ err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES);
+ if (err)
+ printf("error %d %d\n", err, errno);
+ assert(err == 0);
} else {
- assert(bpf_map_delete_elem(fd, &key) == 0);
+ err = map_delete_retriable(fd, &key, MAP_RETRIES);
+ if (err)
+ printf("error %d %d\n", err, errno);
+ assert(err == 0);
}
}
}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 014dedaa4dd2..1e722ee76b1f 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -715,7 +715,7 @@ static void worker_pkt_dump(void)
int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
if (payload == EOT) {
- ksft_print_msg("End-of-tranmission frame received\n");
+ ksft_print_msg("End-of-transmission frame received\n");
fprintf(stdout, "---------------------------------------\n");
break;
}
@@ -747,7 +747,7 @@ static void worker_pkt_validate(void)
}
if (payloadseqnum == EOT) {
- ksft_print_msg("End-of-tranmission frame received: PASS\n");
+ ksft_print_msg("End-of-transmission frame received: PASS\n");
sigvar = 1;
break;
}