summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Fastabend <john.fastabend@gmail.com>2018-03-28 22:49:25 +0300
committerDaniel Borkmann <daniel@iogearbox.net>2018-03-30 01:09:43 +0300
commitfa246693a111fab32bd51d20f07a347e42773ee9 (patch)
treecc0bae757a14383517dff29029cb1e0a1380c7e9
parent2596f64cb2ff5767166bee4e812734747c7a0474 (diff)
downloadlinux-fa246693a111fab32bd51d20f07a347e42773ee9.tar.xz
bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT:
Add support for the BPF_F_INGRESS flag in skb redirect helper. To do this convert skb into a scatterlist and push into ingress queue. This is the same logic that is used in the sk_msg redirect helper so it should feel familiar. Signed-off-by: John Fastabend <john.fastabend@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--include/linux/filter.h1
-rw-r--r--kernel/bpf/sockmap.c94
-rw-r--r--net/core/filter.c2
3 files changed, 78 insertions, 19 deletions
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 961cc5d53956..897ff3d95968 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -521,6 +521,7 @@ struct sk_msg_buff {
__u32 key;
__u32 flags;
struct bpf_map *map;
+ struct sk_buff *skb;
struct list_head list;
};
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 402e15466e9f..9192fdbcbccc 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -785,7 +785,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
i++;
if (i == MAX_SKB_FRAGS)
i = 0;
- put_page(page);
+ if (!md->skb)
+ put_page(page);
}
if (copied == len)
break;
@@ -794,6 +795,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (!sg->length && md->sg_start == md->sg_end) {
list_del(&md->list);
+ if (md->skb)
+ consume_skb(md->skb);
kfree(md);
}
}
@@ -1045,27 +1048,72 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
__SK_DROP;
}
+static int smap_do_ingress(struct smap_psock *psock, struct sk_buff *skb)
+{
+ struct sock *sk = psock->sock;
+ int copied = 0, num_sg;
+ struct sk_msg_buff *r;
+
+ r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_ATOMIC);
+ if (unlikely(!r))
+ return -EAGAIN;
+
+ if (!sk_rmem_schedule(sk, skb, skb->len)) {
+ kfree(r);
+ return -EAGAIN;
+ }
+
+ sg_init_table(r->sg_data, MAX_SKB_FRAGS);
+ num_sg = skb_to_sgvec(skb, r->sg_data, 0, skb->len);
+ if (unlikely(num_sg < 0)) {
+ kfree(r);
+ return num_sg;
+ }
+ sk_mem_charge(sk, skb->len);
+ copied = skb->len;
+ r->sg_start = 0;
+ r->sg_end = num_sg == MAX_SKB_FRAGS ? 0 : num_sg;
+ r->skb = skb;
+ list_add_tail(&r->list, &psock->ingress);
+ sk->sk_data_ready(sk);
+ return copied;
+}
+
static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
{
+ struct smap_psock *peer;
struct sock *sk;
+ __u32 in;
int rc;
rc = smap_verdict_func(psock, skb);
switch (rc) {
case __SK_REDIRECT:
sk = do_sk_redirect_map(skb);
- if (likely(sk)) {
- struct smap_psock *peer = smap_psock_sk(sk);
-
- if (likely(peer &&
- test_bit(SMAP_TX_RUNNING, &peer->state) &&
- !sock_flag(sk, SOCK_DEAD) &&
- sock_writeable(sk))) {
- skb_set_owner_w(skb, sk);
- skb_queue_tail(&peer->rxqueue, skb);
- schedule_work(&peer->tx_work);
- break;
- }
+ if (!sk) {
+ kfree_skb(skb);
+ break;
+ }
+
+ peer = smap_psock_sk(sk);
+ in = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
+
+ if (unlikely(!peer || sock_flag(sk, SOCK_DEAD) ||
+ !test_bit(SMAP_TX_RUNNING, &peer->state))) {
+ kfree_skb(skb);
+ break;
+ }
+
+ if (!in && sock_writeable(sk)) {
+ skb_set_owner_w(skb, sk);
+ skb_queue_tail(&peer->rxqueue, skb);
+ schedule_work(&peer->tx_work);
+ break;
+ } else if (in &&
+ atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
+ skb_queue_tail(&peer->rxqueue, skb);
+ schedule_work(&peer->tx_work);
+ break;
}
/* Fall through and free skb otherwise */
case __SK_DROP:
@@ -1127,15 +1175,23 @@ static void smap_tx_work(struct work_struct *w)
}
while ((skb = skb_dequeue(&psock->rxqueue))) {
+ __u32 flags;
+
rem = skb->len;
off = 0;
start:
+ flags = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
do {
- if (likely(psock->sock->sk_socket))
- n = skb_send_sock_locked(psock->sock,
- skb, off, rem);
- else
+ if (likely(psock->sock->sk_socket)) {
+ if (flags)
+ n = smap_do_ingress(psock, skb);
+ else
+ n = skb_send_sock_locked(psock->sock,
+ skb, off, rem);
+ } else {
n = -EINVAL;
+ }
+
if (n <= 0) {
if (n == -EAGAIN) {
/* Retry when space is available */
@@ -1153,7 +1209,9 @@ start:
rem -= n;
off += n;
} while (rem);
- kfree_skb(skb);
+
+ if (!flags)
+ kfree_skb(skb);
}
out:
release_sock(psock->sock);
diff --git a/net/core/filter.c b/net/core/filter.c
index a5a995e5b380..e989bf313195 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1855,7 +1855,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
/* If user passes invalid input drop the packet. */
- if (unlikely(flags))
+ if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP;
tcb->bpf.key = key;