summaryrefslogtreecommitdiff
path: root/include/net/xfrm.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/xfrm.h')
-rw-r--r--include/net/xfrm.h130
1 files changed, 100 insertions, 30 deletions
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 93a9866ee481..1484dd15a369 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -51,8 +51,10 @@
#ifdef CONFIG_XFRM_STATISTICS
#define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field)
+#define XFRM_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.xfrm_statistics, field, val)
#else
#define XFRM_INC_STATS(net, field) ((void)(net))
+#define XFRM_ADD_STATS(net, field, val) ((void)(net))
#endif
@@ -65,27 +67,27 @@
- instance of a transformer, struct xfrm_state (=SA)
- template to clone xfrm_state, struct xfrm_tmpl
- SPD is plain linear list of xfrm_policy rules, ordered by priority.
+ SPD is organized as hash table (for policies that meet minimum address prefix
+ length setting, net->xfrm.policy_hthresh). Other policies are stored in
+ lists, sorted into rbtree ordered by destination and source address networks.
+ See net/xfrm/xfrm_policy.c for details.
+
(To be compatible with existing pfkeyv2 implementations,
many rules with priority of 0x7fffffff are allowed to exist and
such rules are ordered in an unpredictable way, thanks to bsd folks.)
- Lookup is plain linear search until the first match with selector.
-
If "action" is "block", then we prohibit the flow, otherwise:
if "xfrms_nr" is zero, the flow passes untransformed. Otherwise,
policy entry has list of up to XFRM_MAX_DEPTH transformations,
described by templates xfrm_tmpl. Each template is resolved
to a complete xfrm_state (see below) and we pack bundle of transformations
- to a dst_entry returned to requestor.
+ to a dst_entry returned to requester.
dst -. xfrm .-> xfrm_state #1
|---. child .-> dst -. xfrm .-> xfrm_state #2
|---. child .-> dst -. xfrm .-> xfrm_state #3
|---. child .-> NULL
- Bundles are cached at xrfm_policy struct (field ->bundles).
-
Resolution of xrfm_tmpl
-----------------------
@@ -182,10 +184,13 @@ struct xfrm_state {
};
struct hlist_node byspi;
struct hlist_node byseq;
+ struct hlist_node state_cache;
+ struct hlist_node state_cache_input;
refcount_t refcnt;
spinlock_t lock;
+ u32 pcpu_num;
struct xfrm_id id;
struct xfrm_selector sel;
struct xfrm_mark mark;
@@ -228,7 +233,10 @@ struct xfrm_state {
/* Data for encapsulator */
struct xfrm_encap_tmpl *encap;
- struct sock __rcu *encap_sk;
+
+ /* NAT keepalive */
+ u32 nat_keepalive_interval; /* seconds */
+ time64_t nat_keepalive_expiration;
/* Data for care-of address */
xfrm_address_t *coaddr;
@@ -292,6 +300,7 @@ struct xfrm_state {
/* Private data of this transformer, format is opaque,
* interpreted by xfrm_type methods. */
void *data;
+ u8 dir;
};
static inline struct net *xs_net(struct xfrm_state *x)
@@ -524,11 +533,44 @@ struct xfrm_policy_queue {
unsigned long timeout;
};
+/**
+ * struct xfrm_policy - xfrm policy
+ * @xp_net: network namespace the policy lives in
+ * @bydst: hlist node for SPD hash table or rbtree list
+ * @byidx: hlist node for index hash table
+ * @state_cache_list: hlist head for policy cached xfrm states
+ * @lock: serialize changes to policy structure members
+ * @refcnt: reference count, freed once it reaches 0
+ * @pos: kernel internal tie-breaker to determine age of policy
+ * @timer: timer
+ * @genid: generation, used to invalidate old policies
+ * @priority: priority, set by userspace
+ * @index: policy index (autogenerated)
+ * @if_id: virtual xfrm interface id
+ * @mark: packet mark
+ * @selector: selector
+ * @lft: liftime configuration data
+ * @curlft: liftime state
+ * @walk: list head on pernet policy list
+ * @polq: queue to hold packets while aqcuire operaion in progress
+ * @bydst_reinsert: policy tree node needs to be merged
+ * @type: XFRM_POLICY_TYPE_MAIN or _SUB
+ * @action: XFRM_POLICY_ALLOW or _BLOCK
+ * @flags: XFRM_POLICY_LOCALOK, XFRM_POLICY_ICMP
+ * @xfrm_nr: number of used templates in @xfrm_vec
+ * @family: protocol family
+ * @security: SELinux security label
+ * @xfrm_vec: array of templates to resolve state
+ * @rcu: rcu head, used to defer memory release
+ * @xdo: hardware offload state
+ */
struct xfrm_policy {
possible_net_t xp_net;
struct hlist_node bydst;
struct hlist_node byidx;
+ struct hlist_head state_cache_list;
+
/* This lock only affects elements except for entry. */
rwlock_t lock;
refcount_t refcnt;
@@ -553,7 +595,6 @@ struct xfrm_policy {
u16 family;
struct xfrm_sec_ctx *security;
struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH];
- struct hlist_node bydst_inexact_list;
struct rcu_head rcu;
struct xfrm_dev_offload xdo;
@@ -1014,7 +1055,7 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
struct xfrm_if_parms {
int link; /* ifindex of underlying L2 interface */
- u32 if_id; /* interface identifyer */
+ u32 if_id; /* interface identifier */
bool collect_md;
};
@@ -1181,9 +1222,19 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
if (xo) {
x = xfrm_input_state(skb);
- if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
- return (xo->flags & CRYPTO_DONE) &&
- (xo->status & CRYPTO_SUCCESS);
+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
+ bool check = (xo->flags & CRYPTO_DONE) &&
+ (xo->status & CRYPTO_SUCCESS);
+
+ /* The packets here are plain ones and secpath was
+ * needed to indicate that hardware already handled
+ * them and there is no need to do nothing in addition.
+ *
+ * Consume secpath which was set by drivers.
+ */
+ secpath_reset(skb);
+ return check;
+ }
}
return __xfrm_check_nopolicy(net, skb, dir) ||
@@ -1218,20 +1269,20 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
return __xfrm_policy_check2(sk, dir, skb, AF_INET6, 1);
}
-int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
+int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family, int reverse);
-static inline int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
+static inline int xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family)
{
- return __xfrm_decode_session(skb, fl, family, 0);
+ return __xfrm_decode_session(net, skb, fl, family, 0);
}
-static inline int xfrm_decode_session_reverse(struct sk_buff *skb,
+static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb,
struct flowi *fl,
unsigned int family)
{
- return __xfrm_decode_session(skb, fl, family, 1);
+ return __xfrm_decode_session(net, skb, fl, family, 1);
}
int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
@@ -1307,7 +1358,7 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk
{
return 1;
}
-static inline int xfrm_decode_session_reverse(struct sk_buff *skb,
+static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb,
struct flowi *fl,
unsigned int family)
{
@@ -1588,22 +1639,20 @@ struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
unsigned short family);
int xfrm_state_check_expire(struct xfrm_state *x);
+void xfrm_state_update_stats(struct net *net);
#ifdef CONFIG_XFRM_OFFLOAD
-static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x)
+static inline void xfrm_dev_state_update_stats(struct xfrm_state *x)
{
struct xfrm_dev_offload *xdo = &x->xso;
struct net_device *dev = READ_ONCE(xdo->dev);
- if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
- return;
-
if (dev && dev->xfrmdev_ops &&
- dev->xfrmdev_ops->xdo_dev_state_update_curlft)
- dev->xfrmdev_ops->xdo_dev_state_update_curlft(x);
+ dev->xfrmdev_ops->xdo_dev_state_update_stats)
+ dev->xfrmdev_ops->xdo_dev_state_update_stats(x);
}
#else
-static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x) {}
+static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) {}
#endif
void xfrm_state_insert(struct xfrm_state *x);
int xfrm_state_add(struct xfrm_state *x);
@@ -1611,6 +1660,10 @@ int xfrm_state_update(struct xfrm_state *x);
struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark,
const xfrm_address_t *daddr, __be32 spi,
u8 proto, unsigned short family);
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+ const xfrm_address_t *daddr,
+ __be32 spi, u8 proto,
+ unsigned short family);
struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
@@ -1650,7 +1703,7 @@ struct xfrmk_spdinfo {
u32 spdhmcnt;
};
-struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
+struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
int xfrm_state_delete(struct xfrm_state *x);
int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
@@ -1680,7 +1733,6 @@ int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb);
#endif
void xfrm_local_error(struct sk_buff *skb, int mtu);
-int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type);
int xfrm4_transport_finish(struct sk_buff *skb, int async);
@@ -1700,7 +1752,6 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char prot
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family);
void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
-int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
struct ip6_tnl *t);
int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
@@ -1723,6 +1774,10 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+ struct sk_buff *skb);
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+ struct sk_buff *skb);
int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
int optlen);
#else
@@ -1760,7 +1815,7 @@ int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack);
int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi,
struct netlink_ext_ack *extack);
struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
- u8 mode, u32 reqid, u32 if_id, u8 proto,
+ u8 mode, u32 reqid, u32 if_id, u32 pcpu_num, u8 proto,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr, int create,
unsigned short family);
@@ -2155,7 +2210,7 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk)
proto = sk->sk_protocol;
if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
- return inet6_sk(sk)->dontfrag;
+ return inet6_test_bit(DONTFRAG, sk);
return false;
}
@@ -2177,4 +2232,19 @@ static inline int register_xfrm_interface_bpf(void)
#endif
+#if IS_ENABLED(CONFIG_DEBUG_INFO_BTF)
+int register_xfrm_state_bpf(void);
+#else
+static inline int register_xfrm_state_bpf(void)
+{
+ return 0;
+}
+#endif
+
+int xfrm_nat_keepalive_init(unsigned short family);
+void xfrm_nat_keepalive_fini(unsigned short family);
+int xfrm_nat_keepalive_net_init(struct net *net);
+int xfrm_nat_keepalive_net_fini(struct net *net);
+void xfrm_nat_keepalive_state_updated(struct xfrm_state *x);
+
#endif /* _NET_XFRM_H */