diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/bpf-cgroup.h | 45 | ||||
-rw-r--r-- | include/linux/bpf.h | 2 | ||||
-rw-r--r-- | include/linux/bpf_types.h | 1 | ||||
-rw-r--r-- | include/linux/filter.h | 13 | ||||
-rw-r--r-- | include/linux/list.h | 14 | ||||
-rw-r--r-- | include/net/tcp.h | 8 | ||||
-rw-r--r-- | include/net/xdp_sock.h | 27 | ||||
-rw-r--r-- | include/trace/events/xdp.h | 34 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 33 | ||||
-rw-r--r-- | include/uapi/linux/if_xdp.h | 8 |
10 files changed, 175 insertions, 10 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index bd79ae32909a..169fd25f6bc2 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -124,6 +124,14 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, loff_t *ppos, void **new_buf, enum bpf_attach_type type); +int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, + int *optname, char __user *optval, + int *optlen, char **kernel_optval); +int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, + int optname, char __user *optval, + int __user *optlen, int max_optlen, + int retval); + static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -286,6 +294,38 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, __ret; \ }) +#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ + kernel_optval) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \ + optname, optval, \ + optlen, \ + kernel_optval); \ + __ret; \ +}) + +#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + get_user(__ret, optlen); \ + __ret; \ +}) + +#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \ + max_optlen, retval) \ +({ \ + int __ret = retval; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \ + optname, optval, \ + optlen, max_optlen, \ + retval); \ + __ret; \ +}) + int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, @@ -357,6 +397,11 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; }) +#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ + optlen, max_optlen, retval) ({ retval; }) +#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ + kernel_optval) ({ 0; }) #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a62e7889b0b6..18f4cc2c6acd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -518,6 +518,7 @@ struct bpf_prog_array { struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); void bpf_prog_array_free(struct bpf_prog_array *progs); int bpf_prog_array_length(struct bpf_prog_array *progs); +bool bpf_prog_array_is_empty(struct bpf_prog_array *array); int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs, __u32 __user *prog_ids, u32 cnt); @@ -1051,6 +1052,7 @@ extern const struct bpf_func_proto bpf_spin_unlock_proto; extern const struct bpf_func_proto bpf_get_local_storage_proto; extern const struct bpf_func_proto bpf_strtol_proto; extern const struct bpf_func_proto bpf_strtoul_proto; +extern const struct bpf_func_proto bpf_tcp_sock_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 5a9975678d6f..eec5aeeeaf92 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -30,6 +30,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) #ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt) #endif #ifdef CONFIG_BPF_LIRC_MODE2 BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) diff --git a/include/linux/filter.h b/include/linux/filter.h index 43b45d6db36d..1fe53e78c7e3 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -578,8 +578,9 @@ struct bpf_skb_data_end { }; struct bpf_redirect_info { - u32 ifindex; u32 flags; + u32 tgt_index; + void *tgt_value; struct bpf_map *map; struct bpf_map *map_to_flush; u32 kern_flags; @@ -1199,4 +1200,14 @@ struct bpf_sysctl_kern { u64 tmp_reg; }; +struct bpf_sockopt_kern { + struct sock *sk; + u8 *optval; + u8 *optval_end; + s32 level; + s32 optname; + s32 optlen; + s32 retval; +}; + #endif /* __LINUX_FILTER_H__ */ diff --git a/include/linux/list.h b/include/linux/list.h index e951228db4b2..85c92555e31f 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -106,6 +106,20 @@ static inline void __list_del(struct list_head * prev, struct list_head * next) WRITE_ONCE(prev->next, next); } +/* + * Delete a list entry and clear the 'prev' pointer. + * + * This is a special-purpose list clearing method used in the networking code + * for lists allocated as per-cpu, where we don't want to incur the extra + * WRITE_ONCE() overhead of a regular list_del_init(). The code that uses this + * needs to check the node 'prev' pointer instead of calling list_empty(). + */ +static inline void __list_del_clearprev(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->prev = NULL; +} + /** * list_del - deletes entry from list. * @entry: the element to delete from the list. diff --git a/include/net/tcp.h b/include/net/tcp.h index 9d36cc88d043..e16d8a3fd3b4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2221,6 +2221,14 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); } +static inline void tcp_bpf_rtt(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTT_CB_FLAG)) + tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL); +} + #if IS_ENABLED(CONFIG_SMC) extern struct static_key_false tcp_have_smc; #endif diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index ae0f368a62bb..057b159ff8b9 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -77,10 +77,11 @@ int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); void xsk_flush(struct xdp_sock *xs); bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); /* Used from netdev driver */ +bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt); u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr); void xsk_umem_discard_addr(struct xdp_umem *umem); void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); -bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len); +bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc); void xsk_umem_consume_tx_done(struct xdp_umem *umem); struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries); struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem, @@ -99,6 +100,16 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) } /* Reuse-queue aware version of FILL queue helpers */ +static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt) +{ + struct xdp_umem_fq_reuse *rq = umem->fq_reuse; + + if (rq->length >= cnt) + return true; + + return xsk_umem_has_addrs(umem, cnt - rq->length); +} + static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) { struct xdp_umem_fq_reuse *rq = umem->fq_reuse; @@ -146,6 +157,11 @@ static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) return false; } +static inline bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt) +{ + return false; +} + static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) { return NULL; @@ -159,8 +175,8 @@ static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) { } -static inline bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, - u32 *len) +static inline bool xsk_umem_consume_tx(struct xdp_umem *umem, + struct xdp_desc *desc) { return false; } @@ -200,6 +216,11 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) return 0; } +static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt) +{ + return false; +} + static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) { return NULL; diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index bb5e380e2ef3..68899fdc985b 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -50,6 +50,35 @@ TRACE_EVENT(xdp_exception, __entry->ifindex) ); +TRACE_EVENT(xdp_bulk_tx, + + TP_PROTO(const struct net_device *dev, + int sent, int drops, int err), + + TP_ARGS(dev, sent, drops, err), + + TP_STRUCT__entry( + __field(int, ifindex) + __field(u32, act) + __field(int, drops) + __field(int, sent) + __field(int, err) + ), + + TP_fast_assign( + __entry->ifindex = dev->ifindex; + __entry->act = XDP_TX; + __entry->drops = drops; + __entry->sent = sent; + __entry->err = err; + ), + + TP_printk("ifindex=%d action=%s sent=%d drops=%d err=%d", + __entry->ifindex, + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->sent, __entry->drops, __entry->err) +); + DECLARE_EVENT_CLASS(xdp_redirect_template, TP_PROTO(const struct net_device *dev, @@ -146,9 +175,8 @@ struct _bpf_dtab_netdev { #endif /* __DEVMAP_OBJ_TYPE */ #define devmap_ifindex(fwd, map) \ - (!fwd ? 0 : \ - ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ - ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)) + ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ + ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0) #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b077507efa3f..ead27aebf491 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -170,6 +170,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_FLOW_DISSECTOR, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + BPF_PROG_TYPE_CGROUP_SOCKOPT, }; enum bpf_attach_type { @@ -194,6 +195,8 @@ enum bpf_attach_type { BPF_CGROUP_SYSCTL, BPF_CGROUP_UDP4_RECVMSG, BPF_CGROUP_UDP6_RECVMSG, + BPF_CGROUP_GETSOCKOPT, + BPF_CGROUP_SETSOCKOPT, __MAX_BPF_ATTACH_TYPE }; @@ -1568,8 +1571,11 @@ union bpf_attr { * but this is only implemented for native XDP (with driver * support) as of this writing). * - * All values for *flags* are reserved for future usage, and must - * be left at zero. + * The lower two bits of *flags* are used as the return code if + * the map lookup fails. This is so that the return value can be + * one of the XDP program return codes up to XDP_TX, as chosen by + * the caller. Any higher bits in the *flags* argument must be + * unset. * * When used to redirect packets to net devices, this helper * provides a high performance increase over **bpf_redirect**\ (). @@ -1764,6 +1770,7 @@ union bpf_attr { * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) + * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) * * Therefore, this function can be used to clear a callback flag by * setting the appropriate bit to zero. e.g. to disable the RTO @@ -3066,6 +3073,12 @@ struct bpf_tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ + __u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups + * total number of DSACK blocks received + */ + __u32 delivered; /* Total data packets delivered incl. rexmits */ + __u32 delivered_ce; /* Like the above but only ECE marked packets */ + __u32 icsk_retransmits; /* Number of unrecovered [RTO] timeouts */ }; struct bpf_sock_tuple { @@ -3308,7 +3321,8 @@ struct bpf_sock_ops { #define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) #define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) #define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently +#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently * supported cb flags */ @@ -3363,6 +3377,8 @@ enum { BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after * socket transition to LISTEN state. */ + BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect @@ -3541,4 +3557,15 @@ struct bpf_sysctl { */ }; +struct bpf_sockopt { + __bpf_md_ptr(struct bpf_sock *, sk); + __bpf_md_ptr(void *, optval); + __bpf_md_ptr(void *, optval_end); + + __s32 level; + __s32 optname; + __s32 optlen; + __s32 retval; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index caed8b1614ff..faaa5ca2a117 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -46,6 +46,7 @@ struct xdp_mmap_offsets { #define XDP_UMEM_FILL_RING 5 #define XDP_UMEM_COMPLETION_RING 6 #define XDP_STATISTICS 7 +#define XDP_OPTIONS 8 struct xdp_umem_reg { __u64 addr; /* Start of packet data area */ @@ -60,6 +61,13 @@ struct xdp_statistics { __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ }; +struct xdp_options { + __u32 flags; +}; + +/* Flags for the flags field of struct xdp_options */ +#define XDP_OPTIONS_ZEROCOPY (1 << 0) + /* Pgoff for mmaping the rings */ #define XDP_PGOFF_RX_RING 0 #define XDP_PGOFF_TX_RING 0x80000000 |