summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-06-02 01:53:08 +0300
committerDavid S. Miller <davem@davemloft.net>2020-06-02 01:53:08 +0300
commit9a25c1df24a6fea9dc79eec950453c4e00f707fd (patch)
tree1188078f9838a3b6a60a3923ed31df142ffc8ed6 /net/core
parentefd7ed0f5f2d07ccbb1853c5d46656cdfa1371fb (diff)
parentcf51abcded837ef209faa03a62b2ea44e45995e8 (diff)
downloadlinux-9a25c1df24a6fea9dc79eec950453c4e00f707fd.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2020-06-01 The following pull-request contains BPF updates for your *net-next* tree. We've added 55 non-merge commits during the last 1 day(s) which contain a total of 91 files changed, 4986 insertions(+), 463 deletions(-). The main changes are: 1) Add rx_queue_mapping to bpf_sock from Amritha. 2) Add BPF ring buffer, from Andrii. 3) Attach and run programs through devmap, from David. 4) Allow SO_BINDTODEVICE opt in bpf_setsockopt, from Ferenc. 5) link based flow_dissector, from Jakub. 6) Use tracing helpers for lsm programs, from Jiri. 7) Several sk_msg fixes and extensions, from John. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c18
-rw-r--r--net/core/filter.c94
-rw-r--r--net/core/flow_dissector.c124
-rw-r--r--net/core/skmsg.c98
-rw-r--r--net/core/sock.c10
5 files changed, 211 insertions, 133 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index ae37586f6ee8..10684833f864 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5420,6 +5420,18 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
struct bpf_prog *new = xdp->prog;
int ret = 0;
+ if (new) {
+ u32 i;
+
+ /* generic XDP does not work with DEVMAPs that can
+ * have a bpf_prog installed on an entry
+ */
+ for (i = 0; i < new->aux->used_map_cnt; i++) {
+ if (dev_map_can_have_prog(new->aux->used_maps[i]))
+ return -EINVAL;
+ }
+ }
+
switch (xdp->command) {
case XDP_SETUP_PROG:
rcu_assign_pointer(dev->xdp_prog, new);
@@ -8835,6 +8847,12 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
return -EINVAL;
}
+ if (prog->expected_attach_type == BPF_XDP_DEVMAP) {
+ NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
+
/* prog->aux->id may be 0 for orphaned device-bound progs */
if (prog->aux->id && prog->aux->id == prog_id) {
bpf_prog_put(prog);
diff --git a/net/core/filter.c b/net/core/filter.c
index bd2853d23b50..ae82bcb03124 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4248,6 +4248,9 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
static int _bpf_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen, u32 flags)
{
+ char devname[IFNAMSIZ];
+ struct net *net;
+ int ifindex;
int ret = 0;
int val;
@@ -4257,7 +4260,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
sock_owned_by_me(sk);
if (level == SOL_SOCKET) {
- if (optlen != sizeof(int))
+ if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
return -EINVAL;
val = *((int *)optval);
@@ -4298,6 +4301,29 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
sk_dst_reset(sk);
}
break;
+ case SO_BINDTODEVICE:
+ ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+ optlen = min_t(long, optlen, IFNAMSIZ - 1);
+ strncpy(devname, optval, optlen);
+ devname[optlen] = 0;
+
+ ifindex = 0;
+ if (devname[0] != '\0') {
+ struct net_device *dev;
+
+ ret = -ENODEV;
+
+ net = sock_net(sk);
+ dev = dev_get_by_name(net, devname);
+ if (!dev)
+ break;
+ ifindex = dev->ifindex;
+ dev_put(dev);
+ }
+ ret = sock_bindtoindex(sk, ifindex, false);
+#endif
+ break;
default:
ret = -EINVAL;
}
@@ -6443,6 +6469,26 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_msg_push_data_proto;
case BPF_FUNC_msg_pop_data:
return &bpf_msg_pop_data_proto;
+ case BPF_FUNC_perf_event_output:
+ return &bpf_event_output_data_proto;
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
+ case BPF_FUNC_get_current_pid_tgid:
+ return &bpf_get_current_pid_tgid_proto;
+ case BPF_FUNC_sk_storage_get:
+ return &bpf_sk_storage_get_proto;
+ case BPF_FUNC_sk_storage_delete:
+ return &bpf_sk_storage_delete_proto;
+#ifdef CONFIG_CGROUPS
+ case BPF_FUNC_get_current_cgroup_id:
+ return &bpf_get_current_cgroup_id_proto;
+ case BPF_FUNC_get_current_ancestor_cgroup_id:
+ return &bpf_get_current_ancestor_cgroup_id_proto;
+#endif
+#ifdef CONFIG_CGROUP_NET_CLASSID
+ case BPF_FUNC_get_cgroup_classid:
+ return &bpf_get_cgroup_classid_curr_proto;
+#endif
default:
return bpf_base_func_proto(func_id);
}
@@ -6829,6 +6875,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
case offsetof(struct bpf_sock, protocol):
case offsetof(struct bpf_sock, dst_port):
case offsetof(struct bpf_sock, src_port):
+ case offsetof(struct bpf_sock, rx_queue_mapping):
case bpf_ctx_range(struct bpf_sock, src_ip4):
case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
case bpf_ctx_range(struct bpf_sock, dst_ip4):
@@ -6994,6 +7041,13 @@ static bool xdp_is_valid_access(int off, int size,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
+ if (prog->expected_attach_type != BPF_XDP_DEVMAP) {
+ switch (off) {
+ case offsetof(struct xdp_md, egress_ifindex):
+ return false;
+ }
+ }
+
if (type == BPF_WRITE) {
if (bpf_prog_is_dev_bound(prog->aux)) {
switch (off) {
@@ -7257,6 +7311,11 @@ static bool sk_msg_is_valid_access(int off, int size,
if (size != sizeof(__u64))
return false;
break;
+ case offsetof(struct sk_msg_md, sk):
+ if (size != sizeof(__u64))
+ return false;
+ info->reg_type = PTR_TO_SOCKET;
+ break;
case bpf_ctx_range(struct sk_msg_md, family):
case bpf_ctx_range(struct sk_msg_md, remote_ip4):
case bpf_ctx_range(struct sk_msg_md, local_ip4):
@@ -7872,6 +7931,23 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
skc_state),
target_size));
break;
+ case offsetof(struct bpf_sock, rx_queue_mapping):
+#ifdef CONFIG_XPS
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock, sk_rx_queue_mapping,
+ sizeof_field(struct sock,
+ sk_rx_queue_mapping),
+ target_size));
+ *insn++ = BPF_JMP_IMM(BPF_JNE, si->dst_reg, NO_QUEUE_MAPPING,
+ 1);
+ *insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
+#else
+ *insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
+ *target_size = 2;
+#endif
+ break;
}
return insn - insn_buf;
@@ -7942,6 +8018,16 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
offsetof(struct xdp_rxq_info,
queue_index));
break;
+ case offsetof(struct xdp_md, egress_ifindex):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, txq),
+ si->dst_reg, si->src_reg,
+ offsetof(struct xdp_buff, txq));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct xdp_txq_info, dev));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct net_device, ifindex));
+ break;
}
return insn - insn_buf;
@@ -8593,6 +8679,12 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
offsetof(struct sk_msg_sg, size));
break;
+
+ case offsetof(struct sk_msg_md, sk):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg, sk));
+ break;
}
return insn - insn_buf;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 0aeb33572feb..d02df0b6d0d9 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -31,8 +31,7 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_labels.h>
#endif
-
-static DEFINE_MUTEX(flow_dissector_mutex);
+#include <linux/bpf-netns.h>
static void dissector_set_key(struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
@@ -70,54 +69,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
}
EXPORT_SYMBOL(skb_flow_dissector_init);
-int skb_flow_dissector_prog_query(const union bpf_attr *attr,
- union bpf_attr __user *uattr)
-{
- __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
- u32 prog_id, prog_cnt = 0, flags = 0;
- struct bpf_prog *attached;
- struct net *net;
-
- if (attr->query.query_flags)
- return -EINVAL;
-
- net = get_net_ns_by_fd(attr->query.target_fd);
- if (IS_ERR(net))
- return PTR_ERR(net);
-
- rcu_read_lock();
- attached = rcu_dereference(net->flow_dissector_prog);
- if (attached) {
- prog_cnt = 1;
- prog_id = attached->aux->id;
- }
- rcu_read_unlock();
-
- put_net(net);
-
- if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
- return -EFAULT;
- if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
- return -EFAULT;
-
- if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
- return 0;
-
- if (copy_to_user(prog_ids, &prog_id, sizeof(u32)))
- return -EFAULT;
-
- return 0;
-}
-
-int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
- struct bpf_prog *prog)
+#ifdef CONFIG_BPF_SYSCALL
+int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog)
{
+ enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
struct bpf_prog *attached;
- struct net *net;
- int ret = 0;
-
- net = current->nsproxy->net_ns;
- mutex_lock(&flow_dissector_mutex);
if (net == &init_net) {
/* BPF flow dissector in the root namespace overrides
@@ -130,70 +86,29 @@ int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
for_each_net(ns) {
if (ns == &init_net)
continue;
- if (rcu_access_pointer(ns->flow_dissector_prog)) {
- ret = -EEXIST;
- goto out;
- }
+ if (rcu_access_pointer(ns->bpf.progs[type]))
+ return -EEXIST;
}
} else {
/* Make sure root flow dissector is not attached
* when attaching to the non-root namespace.
*/
- if (rcu_access_pointer(init_net.flow_dissector_prog)) {
- ret = -EEXIST;
- goto out;
- }
+ if (rcu_access_pointer(init_net.bpf.progs[type]))
+ return -EEXIST;
}
- attached = rcu_dereference_protected(net->flow_dissector_prog,
- lockdep_is_held(&flow_dissector_mutex));
- if (attached == prog) {
+ attached = rcu_dereference_protected(net->bpf.progs[type],
+ lockdep_is_held(&netns_bpf_mutex));
+ if (attached == prog)
/* The same program cannot be attached twice */
- ret = -EINVAL;
- goto out;
- }
- rcu_assign_pointer(net->flow_dissector_prog, prog);
+ return -EINVAL;
+
+ rcu_assign_pointer(net->bpf.progs[type], prog);
if (attached)
bpf_prog_put(attached);
-out:
- mutex_unlock(&flow_dissector_mutex);
- return ret;
-}
-
-static int flow_dissector_bpf_prog_detach(struct net *net)
-{
- struct bpf_prog *attached;
-
- mutex_lock(&flow_dissector_mutex);
- attached = rcu_dereference_protected(net->flow_dissector_prog,
- lockdep_is_held(&flow_dissector_mutex));
- if (!attached) {
- mutex_unlock(&flow_dissector_mutex);
- return -ENOENT;
- }
- RCU_INIT_POINTER(net->flow_dissector_prog, NULL);
- bpf_prog_put(attached);
- mutex_unlock(&flow_dissector_mutex);
return 0;
}
-
-int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
-{
- return flow_dissector_bpf_prog_detach(current->nsproxy->net_ns);
-}
-
-static void __net_exit flow_dissector_pernet_pre_exit(struct net *net)
-{
- /* We're not racing with attach/detach because there are no
- * references to netns left when pre_exit gets called.
- */
- if (rcu_access_pointer(net->flow_dissector_prog))
- flow_dissector_bpf_prog_detach(net);
-}
-
-static struct pernet_operations flow_dissector_pernet_ops __net_initdata = {
- .pre_exit = flow_dissector_pernet_pre_exit,
-};
+#endif /* CONFIG_BPF_SYSCALL */
/**
* __skb_flow_get_ports - extract the upper layer ports and return them
@@ -1044,11 +959,13 @@ bool __skb_flow_dissect(const struct net *net,
WARN_ON_ONCE(!net);
if (net) {
+ enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
+
rcu_read_lock();
- attached = rcu_dereference(init_net.flow_dissector_prog);
+ attached = rcu_dereference(init_net.bpf.progs[type]);
if (!attached)
- attached = rcu_dereference(net->flow_dissector_prog);
+ attached = rcu_dereference(net->bpf.progs[type]);
if (attached) {
struct bpf_flow_keys flow_keys;
@@ -1869,7 +1786,6 @@ static int __init init_default_flow_dissectors(void)
skb_flow_dissector_init(&flow_keys_basic_dissector,
flow_keys_basic_dissector_keys,
ARRAY_SIZE(flow_keys_basic_dissector_keys));
-
- return register_pernet_subsys(&flow_dissector_pernet_ops);
+ return 0;
}
core_initcall(init_default_flow_dissectors);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index c479372f2cd2..351afbf6bfba 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -7,6 +7,7 @@
#include <net/sock.h>
#include <net/tcp.h>
+#include <net/tls.h>
static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
{
@@ -682,13 +683,75 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
return container_of(parser, struct sk_psock, parser);
}
-static void sk_psock_verdict_apply(struct sk_psock *psock,
- struct sk_buff *skb, int verdict)
+static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
{
struct sk_psock *psock_other;
struct sock *sk_other;
bool ingress;
+ sk_other = tcp_skb_bpf_redirect_fetch(skb);
+ if (unlikely(!sk_other)) {
+ kfree_skb(skb);
+ return;
+ }
+ psock_other = sk_psock(sk_other);
+ if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
+ !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
+ kfree_skb(skb);
+ return;
+ }
+
+ ingress = tcp_skb_bpf_ingress(skb);
+ if ((!ingress && sock_writeable(sk_other)) ||
+ (ingress &&
+ atomic_read(&sk_other->sk_rmem_alloc) <=
+ sk_other->sk_rcvbuf)) {
+ if (!ingress)
+ skb_set_owner_w(skb, sk_other);
+ skb_queue_tail(&psock_other->ingress_skb, skb);
+ schedule_work(&psock_other->work);
+ } else {
+ kfree_skb(skb);
+ }
+}
+
+static void sk_psock_tls_verdict_apply(struct sk_psock *psock,
+ struct sk_buff *skb, int verdict)
+{
+ switch (verdict) {
+ case __SK_REDIRECT:
+ sk_psock_skb_redirect(psock, skb);
+ break;
+ case __SK_PASS:
+ case __SK_DROP:
+ default:
+ break;
+ }
+}
+
+int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
+{
+ struct bpf_prog *prog;
+ int ret = __SK_PASS;
+
+ rcu_read_lock();
+ prog = READ_ONCE(psock->progs.skb_verdict);
+ if (likely(prog)) {
+ tcp_skb_bpf_redirect_clear(skb);
+ ret = sk_psock_bpf_run(psock, prog, skb);
+ ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+ }
+ rcu_read_unlock();
+ sk_psock_tls_verdict_apply(psock, skb, ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
+
+static void sk_psock_verdict_apply(struct sk_psock *psock,
+ struct sk_buff *skb, int verdict)
+{
+ struct sock *sk_other;
+
switch (verdict) {
case __SK_PASS:
sk_other = psock->sk;
@@ -707,25 +770,8 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
}
goto out_free;
case __SK_REDIRECT:
- sk_other = tcp_skb_bpf_redirect_fetch(skb);
- if (unlikely(!sk_other))
- goto out_free;
- psock_other = sk_psock(sk_other);
- if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
- !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED))
- goto out_free;
- ingress = tcp_skb_bpf_ingress(skb);
- if ((!ingress && sock_writeable(sk_other)) ||
- (ingress &&
- atomic_read(&sk_other->sk_rmem_alloc) <=
- sk_other->sk_rcvbuf)) {
- if (!ingress)
- skb_set_owner_w(skb, sk_other);
- skb_queue_tail(&psock_other->ingress_skb, skb);
- schedule_work(&psock_other->work);
- break;
- }
- /* fall-through */
+ sk_psock_skb_redirect(psock, skb);
+ break;
case __SK_DROP:
/* fall-through */
default:
@@ -779,9 +825,13 @@ static void sk_psock_strp_data_ready(struct sock *sk)
rcu_read_lock();
psock = sk_psock(sk);
if (likely(psock)) {
- write_lock_bh(&sk->sk_callback_lock);
- strp_data_ready(&psock->parser.strp);
- write_unlock_bh(&sk->sk_callback_lock);
+ if (tls_sw_has_ctx_rx(sk)) {
+ psock->parser.saved_data_ready(sk);
+ } else {
+ write_lock_bh(&sk->sk_callback_lock);
+ strp_data_ready(&psock->parser.strp);
+ write_unlock_bh(&sk->sk_callback_lock);
+ }
}
rcu_read_unlock();
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 61ec573221a6..6c4acf1f0220 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -594,13 +594,15 @@ out:
return ret;
}
-int sock_bindtoindex(struct sock *sk, int ifindex)
+int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
{
int ret;
- lock_sock(sk);
+ if (lock_sk)
+ lock_sock(sk);
ret = sock_bindtoindex_locked(sk, ifindex);
- release_sock(sk);
+ if (lock_sk)
+ release_sock(sk);
return ret;
}
@@ -646,7 +648,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
goto out;
}
- return sock_bindtoindex(sk, index);
+ return sock_bindtoindex(sk, index, true);
out:
#endif