diff options
Diffstat (limited to 'include/net/xfrm.h')
-rw-r--r-- | include/net/xfrm.h | 130 |
1 files changed, 100 insertions, 30 deletions
diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 93a9866ee481..1484dd15a369 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -51,8 +51,10 @@ #ifdef CONFIG_XFRM_STATISTICS #define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field) +#define XFRM_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.xfrm_statistics, field, val) #else #define XFRM_INC_STATS(net, field) ((void)(net)) +#define XFRM_ADD_STATS(net, field, val) ((void)(net)) #endif @@ -65,27 +67,27 @@ - instance of a transformer, struct xfrm_state (=SA) - template to clone xfrm_state, struct xfrm_tmpl - SPD is plain linear list of xfrm_policy rules, ordered by priority. + SPD is organized as hash table (for policies that meet minimum address prefix + length setting, net->xfrm.policy_hthresh). Other policies are stored in + lists, sorted into rbtree ordered by destination and source address networks. + See net/xfrm/xfrm_policy.c for details. + (To be compatible with existing pfkeyv2 implementations, many rules with priority of 0x7fffffff are allowed to exist and such rules are ordered in an unpredictable way, thanks to bsd folks.) - Lookup is plain linear search until the first match with selector. - If "action" is "block", then we prohibit the flow, otherwise: if "xfrms_nr" is zero, the flow passes untransformed. Otherwise, policy entry has list of up to XFRM_MAX_DEPTH transformations, described by templates xfrm_tmpl. Each template is resolved to a complete xfrm_state (see below) and we pack bundle of transformations - to a dst_entry returned to requestor. + to a dst_entry returned to requester. dst -. xfrm .-> xfrm_state #1 |---. child .-> dst -. xfrm .-> xfrm_state #2 |---. child .-> dst -. xfrm .-> xfrm_state #3 |---. child .-> NULL - Bundles are cached at xrfm_policy struct (field ->bundles). - Resolution of xrfm_tmpl ----------------------- @@ -182,10 +184,13 @@ struct xfrm_state { }; struct hlist_node byspi; struct hlist_node byseq; + struct hlist_node state_cache; + struct hlist_node state_cache_input; refcount_t refcnt; spinlock_t lock; + u32 pcpu_num; struct xfrm_id id; struct xfrm_selector sel; struct xfrm_mark mark; @@ -228,7 +233,10 @@ struct xfrm_state { /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; - struct sock __rcu *encap_sk; + + /* NAT keepalive */ + u32 nat_keepalive_interval; /* seconds */ + time64_t nat_keepalive_expiration; /* Data for care-of address */ xfrm_address_t *coaddr; @@ -292,6 +300,7 @@ struct xfrm_state { /* Private data of this transformer, format is opaque, * interpreted by xfrm_type methods. */ void *data; + u8 dir; }; static inline struct net *xs_net(struct xfrm_state *x) @@ -524,11 +533,44 @@ struct xfrm_policy_queue { unsigned long timeout; }; +/** + * struct xfrm_policy - xfrm policy + * @xp_net: network namespace the policy lives in + * @bydst: hlist node for SPD hash table or rbtree list + * @byidx: hlist node for index hash table + * @state_cache_list: hlist head for policy cached xfrm states + * @lock: serialize changes to policy structure members + * @refcnt: reference count, freed once it reaches 0 + * @pos: kernel internal tie-breaker to determine age of policy + * @timer: timer + * @genid: generation, used to invalidate old policies + * @priority: priority, set by userspace + * @index: policy index (autogenerated) + * @if_id: virtual xfrm interface id + * @mark: packet mark + * @selector: selector + * @lft: liftime configuration data + * @curlft: liftime state + * @walk: list head on pernet policy list + * @polq: queue to hold packets while aqcuire operaion in progress + * @bydst_reinsert: policy tree node needs to be merged + * @type: XFRM_POLICY_TYPE_MAIN or _SUB + * @action: XFRM_POLICY_ALLOW or _BLOCK + * @flags: XFRM_POLICY_LOCALOK, XFRM_POLICY_ICMP + * @xfrm_nr: number of used templates in @xfrm_vec + * @family: protocol family + * @security: SELinux security label + * @xfrm_vec: array of templates to resolve state + * @rcu: rcu head, used to defer memory release + * @xdo: hardware offload state + */ struct xfrm_policy { possible_net_t xp_net; struct hlist_node bydst; struct hlist_node byidx; + struct hlist_head state_cache_list; + /* This lock only affects elements except for entry. */ rwlock_t lock; refcount_t refcnt; @@ -553,7 +595,6 @@ struct xfrm_policy { u16 family; struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; - struct hlist_node bydst_inexact_list; struct rcu_head rcu; struct xfrm_dev_offload xdo; @@ -1014,7 +1055,7 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); struct xfrm_if_parms { int link; /* ifindex of underlying L2 interface */ - u32 if_id; /* interface identifyer */ + u32 if_id; /* interface identifier */ bool collect_md; }; @@ -1181,9 +1222,19 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, if (xo) { x = xfrm_input_state(skb); - if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) - return (xo->flags & CRYPTO_DONE) && - (xo->status & CRYPTO_SUCCESS); + if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) { + bool check = (xo->flags & CRYPTO_DONE) && + (xo->status & CRYPTO_SUCCESS); + + /* The packets here are plain ones and secpath was + * needed to indicate that hardware already handled + * them and there is no need to do nothing in addition. + * + * Consume secpath which was set by drivers. + */ + secpath_reset(skb); + return check; + } } return __xfrm_check_nopolicy(net, skb, dir) || @@ -1218,20 +1269,20 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir, return __xfrm_policy_check2(sk, dir, skb, AF_INET6, 1); } -int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, +int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse); -static inline int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, +static inline int xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { - return __xfrm_decode_session(skb, fl, family, 0); + return __xfrm_decode_session(net, skb, fl, family, 0); } -static inline int xfrm_decode_session_reverse(struct sk_buff *skb, +static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { - return __xfrm_decode_session(skb, fl, family, 1); + return __xfrm_decode_session(net, skb, fl, family, 1); } int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); @@ -1307,7 +1358,7 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk { return 1; } -static inline int xfrm_decode_session_reverse(struct sk_buff *skb, +static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { @@ -1588,22 +1639,20 @@ struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id, struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, unsigned short family); int xfrm_state_check_expire(struct xfrm_state *x); +void xfrm_state_update_stats(struct net *net); #ifdef CONFIG_XFRM_OFFLOAD -static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x) +static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) { struct xfrm_dev_offload *xdo = &x->xso; struct net_device *dev = READ_ONCE(xdo->dev); - if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) - return; - if (dev && dev->xfrmdev_ops && - dev->xfrmdev_ops->xdo_dev_state_update_curlft) - dev->xfrmdev_ops->xdo_dev_state_update_curlft(x); + dev->xfrmdev_ops->xdo_dev_state_update_stats) + dev->xfrmdev_ops->xdo_dev_state_update_stats(x); } #else -static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x) {} +static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) {} #endif void xfrm_state_insert(struct xfrm_state *x); int xfrm_state_add(struct xfrm_state *x); @@ -1611,6 +1660,10 @@ int xfrm_state_update(struct xfrm_state *x); struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family); +struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, + const xfrm_address_t *daddr, + __be32 spi, u8 proto, + unsigned short family); struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark, const xfrm_address_t *daddr, const xfrm_address_t *saddr, @@ -1650,7 +1703,7 @@ struct xfrmk_spdinfo { u32 spdhmcnt; }; -struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num); int xfrm_state_delete(struct xfrm_state *x); int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync); int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); @@ -1680,7 +1733,6 @@ int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb); #endif void xfrm_local_error(struct sk_buff *skb, int mtu); -int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm4_transport_finish(struct sk_buff *skb, int async); @@ -1700,7 +1752,6 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char prot int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); void xfrm4_local_error(struct sk_buff *skb, u32 mtu); -int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t); int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, @@ -1723,6 +1774,10 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu); int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb); +struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb); int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen); #else @@ -1760,7 +1815,7 @@ int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack); int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi, struct netlink_ext_ack *extack); struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, - u8 mode, u32 reqid, u32 if_id, u8 proto, + u8 mode, u32 reqid, u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create, unsigned short family); @@ -2155,7 +2210,7 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk) proto = sk->sk_protocol; if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) - return inet6_sk(sk)->dontfrag; + return inet6_test_bit(DONTFRAG, sk); return false; } @@ -2177,4 +2232,19 @@ static inline int register_xfrm_interface_bpf(void) #endif +#if IS_ENABLED(CONFIG_DEBUG_INFO_BTF) +int register_xfrm_state_bpf(void); +#else +static inline int register_xfrm_state_bpf(void) +{ + return 0; +} +#endif + +int xfrm_nat_keepalive_init(unsigned short family); +void xfrm_nat_keepalive_fini(unsigned short family); +int xfrm_nat_keepalive_net_init(struct net *net); +int xfrm_nat_keepalive_net_fini(struct net *net); +void xfrm_nat_keepalive_state_updated(struct xfrm_state *x); + #endif /* _NET_XFRM_H */ |