From 093b0f366567aa3fed85c316f832607069202b23 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:52 +0200 Subject: net: psample: add user cookie Add a user cookie to the sample metadata so that sample emitters can provide more contextual information to samples. If present, send the user cookie in a new attribute: PSAMPLE_ATTR_USER_COOKIE. Reviewed-by: Michal Kubiak Acked-by: Eelco Chaudron Reviewed-by: Simon Horman Reviewed-by: Ido Schimmel Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-2-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/psample.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index e585db5bf2d2..e80637e1d97b 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -19,6 +19,7 @@ enum { PSAMPLE_ATTR_LATENCY, /* u64, nanoseconds */ PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */ PSAMPLE_ATTR_PROTO, /* u16 */ + PSAMPLE_ATTR_USER_COOKIE, /* binary, user provided data */ __PSAMPLE_ATTR_MAX }; -- cgit v1.2.3 From 7b1b2b60c63f070e0dfbe072ccaae13168b38d01 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:55 +0200 Subject: net: psample: allow using rate as probability Although not explicitly documented in the psample module itself, the definition of PSAMPLE_ATTR_SAMPLE_RATE seems inherited from act_sample. Quoting tc-sample(8): "RATE of 100 will lead to an average of one sampled packet out of every 100 observed." With this semantics, the rates that we can express with an unsigned 32-bits number are very unevenly distributed and concentrated towards "sampling few packets". For example, we can express a probability of 2.32E-8% but we cannot express anything between 100% and 50%. For sampling applications that are capable of sampling a decent amount of packets, this sampling rate semantics is not very useful. Add a new flag to the uAPI that indicates that the sampling rate is expressed in scaled probability, this is: - 0 is 0% probability, no packets get sampled. - U32_MAX is 100% probability, all packets get sampled. Reviewed-by: Aaron Conole Acked-by: Eelco Chaudron Reviewed-by: Ido Schimmel Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-5-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/net/psample.h | 3 ++- include/uapi/linux/psample.h | 10 +++++++++- net/psample/psample.c | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/psample.h b/include/net/psample.h index 2ac71260a546..c52e9ebd88dd 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -24,7 +24,8 @@ struct psample_metadata { u8 out_tc_valid:1, out_tc_occ_valid:1, latency_valid:1, - unused:5; + rate_as_probability:1, + unused:4; const u8 *user_cookie; u32 user_cookie_len; }; diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index e80637e1d97b..b765f0e81f20 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -8,7 +8,11 @@ enum { PSAMPLE_ATTR_ORIGSIZE, PSAMPLE_ATTR_SAMPLE_GROUP, PSAMPLE_ATTR_GROUP_SEQ, - PSAMPLE_ATTR_SAMPLE_RATE, + PSAMPLE_ATTR_SAMPLE_RATE, /* u32, ratio between observed and + * sampled packets or scaled probability + * if PSAMPLE_ATTR_SAMPLE_PROBABILITY + * is set. + */ PSAMPLE_ATTR_DATA, PSAMPLE_ATTR_GROUP_REFCOUNT, PSAMPLE_ATTR_TUNNEL, @@ -20,6 +24,10 @@ enum { PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */ PSAMPLE_ATTR_PROTO, /* u16 */ PSAMPLE_ATTR_USER_COOKIE, /* binary, user provided data */ + PSAMPLE_ATTR_SAMPLE_PROBABILITY,/* no argument, interpret rate in + * PSAMPLE_ATTR_SAMPLE_RATE as a + * probability scaled 0 - U32_MAX. + */ __PSAMPLE_ATTR_MAX }; diff --git a/net/psample/psample.c b/net/psample/psample.c index 1c76f3e48dcd..f48b5b9cd409 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -497,6 +497,9 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, md->user_cookie)) goto error; + if (md->rate_as_probability) + nla_put_flag(skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY); + genlmsg_end(nl_skb, data); genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); -- cgit v1.2.3 From aae0b82b46cb5004bdf82a000c004d69a0885c33 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:56 +0200 Subject: net: openvswitch: add psample action Add support for a new action: psample. This action accepts a u32 group id and a variable-length cookie and uses the psample multicast group to make the packet available for observability. The maximum length of the user-defined cookie is set to 16, same as tc_cookie, to discourage using cookies that will not be offloadable. Reviewed-by: Michal Kubiak Reviewed-by: Aaron Conole Reviewed-by: Ilya Maximets Acked-by: Eelco Chaudron Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-6-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ovs_flow.yaml | 17 +++++++++++ include/uapi/linux/openvswitch.h | 28 ++++++++++++++++++ net/openvswitch/Kconfig | 1 + net/openvswitch/actions.c | 48 +++++++++++++++++++++++++++++++ net/openvswitch/flow_netlink.c | 32 ++++++++++++++++++++- 5 files changed, 125 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml index 4fdfc6b5cae9..46f5d1cd8a5f 100644 --- a/Documentation/netlink/specs/ovs_flow.yaml +++ b/Documentation/netlink/specs/ovs_flow.yaml @@ -727,6 +727,12 @@ attribute-sets: name: dec-ttl type: nest nested-attributes: dec-ttl-attrs + - + name: psample + type: nest + nested-attributes: psample-attrs + doc: | + Sends a packet sample to psample for external observation. - name: tunnel-key-attrs enum-name: ovs-tunnel-key-attr @@ -938,6 +944,17 @@ attribute-sets: - name: gbp type: u32 + - + name: psample-attrs + enum-name: ovs-psample-attr + name-prefix: ovs-psample-attr- + attributes: + - + name: group + type: u32 + - + name: cookie + type: binary operations: name-prefix: ovs-flow-cmd- diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index efc82c318fa2..3dd653748725 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -914,6 +914,31 @@ struct check_pkt_len_arg { }; #endif +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16 +/** + * enum ovs_psample_attr - Attributes for %OVS_ACTION_ATTR_PSAMPLE + * action. + * + * @OVS_PSAMPLE_ATTR_GROUP: 32-bit number to identify the source of the + * sample. + * @OVS_PSAMPLE_ATTR_COOKIE: An optional variable-length binary cookie that + * contains user-defined metadata. The maximum length is + * OVS_PSAMPLE_COOKIE_MAX_SIZE bytes. + * + * Sends the packet to the psample multicast group with the specified group and + * cookie. It is possible to combine this action with the + * %OVS_ACTION_ATTR_TRUNC action to limit the size of the sample. + */ +enum ovs_psample_attr { + OVS_PSAMPLE_ATTR_GROUP = 1, /* u32 number. */ + OVS_PSAMPLE_ATTR_COOKIE, /* Optional, user specified cookie. */ + + /* private: */ + __OVS_PSAMPLE_ATTR_MAX +}; + +#define OVS_PSAMPLE_ATTR_MAX (__OVS_PSAMPLE_ATTR_MAX - 1) + /** * enum ovs_action_attr - Action types. * @@ -966,6 +991,8 @@ struct check_pkt_len_arg { * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS * argument. * @OVS_ACTION_ATTR_DROP: Explicit drop action. + * @OVS_ACTION_ATTR_PSAMPLE: Send a sample of the packet to external observers + * via psample. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -1004,6 +1031,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ OVS_ACTION_ATTR_DROP, /* u32 error code. */ + OVS_ACTION_ATTR_PSAMPLE, /* Nested OVS_PSAMPLE_ATTR_*. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 29a7081858cd..2535f3f9f462 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -10,6 +10,7 @@ config OPENVSWITCH (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \ (!NF_NAT || NF_NAT) && \ (!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT))) + depends on PSAMPLE || !PSAMPLE select LIBCRC32C select MPLS select NET_MPLS_GSO diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 964225580824..892d7e48fc5b 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -24,6 +24,11 @@ #include #include #include + +#if IS_ENABLED(CONFIG_PSAMPLE) +#include +#endif + #include #include "datapath.h" @@ -1299,6 +1304,40 @@ static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key) return 0; } +#if IS_ENABLED(CONFIG_PSAMPLE) +static void execute_psample(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr) +{ + struct psample_group psample_group = {}; + struct psample_metadata md = {}; + const struct nlattr *a; + int rem; + + nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) { + switch (nla_type(a)) { + case OVS_PSAMPLE_ATTR_GROUP: + psample_group.group_num = nla_get_u32(a); + break; + + case OVS_PSAMPLE_ATTR_COOKIE: + md.user_cookie = nla_data(a); + md.user_cookie_len = nla_len(a); + break; + } + } + + psample_group.net = ovs_dp_get_net(dp); + md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex; + md.trunc_size = skb->len - OVS_CB(skb)->cutlen; + + psample_sample_packet(&psample_group, skb, 0, &md); +} +#else +static void execute_psample(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr) +{} +#endif + /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, @@ -1502,6 +1541,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, ovs_kfree_skb_reason(skb, reason); return 0; } + + case OVS_ACTION_ATTR_PSAMPLE: + execute_psample(dp, skb, a); + OVS_CB(skb)->cutlen = 0; + if (nla_is_last(a, rem)) { + consume_skb(skb); + return 0; + } + break; } if (unlikely(err)) { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index f224d9bcea5e..c92bdc4dfe19 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -64,6 +64,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_TRUNC: case OVS_ACTION_ATTR_USERSPACE: case OVS_ACTION_ATTR_DROP: + case OVS_ACTION_ATTR_PSAMPLE: break; case OVS_ACTION_ATTR_CT: @@ -2409,7 +2410,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) /* Whenever new actions are added, the need to update this * function should be considered. */ - BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24); + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 25); if (!actions) return; @@ -3157,6 +3158,28 @@ static int validate_and_copy_check_pkt_len(struct net *net, return 0; } +static int validate_psample(const struct nlattr *attr) +{ + static const struct nla_policy policy[OVS_PSAMPLE_ATTR_MAX + 1] = { + [OVS_PSAMPLE_ATTR_GROUP] = { .type = NLA_U32 }, + [OVS_PSAMPLE_ATTR_COOKIE] = { + .type = NLA_BINARY, + .len = OVS_PSAMPLE_COOKIE_MAX_SIZE, + }, + }; + struct nlattr *a[OVS_PSAMPLE_ATTR_MAX + 1]; + int err; + + if (!IS_ENABLED(CONFIG_PSAMPLE)) + return -EOPNOTSUPP; + + err = nla_parse_nested(a, OVS_PSAMPLE_ATTR_MAX, attr, policy, NULL); + if (err) + return err; + + return a[OVS_PSAMPLE_ATTR_GROUP] ? 0 : -EINVAL; +} + static int copy_action(const struct nlattr *from, struct sw_flow_actions **sfa, bool log) { @@ -3212,6 +3235,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, [OVS_ACTION_ATTR_DROP] = sizeof(u32), + [OVS_ACTION_ATTR_PSAMPLE] = (u32)-1, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -3490,6 +3514,12 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return -EINVAL; break; + case OVS_ACTION_ATTR_PSAMPLE: + err = validate_psample(a); + if (err) + return err; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; -- cgit v1.2.3 From 71763d8a8203c28178d7be7f18af73d4dddb36ba Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:57 +0200 Subject: net: openvswitch: store sampling probability in cb. When a packet sample is observed, the sampling rate that was used is important to estimate the real frequency of such event. Store the probability of the parent sample action in the skb's cb area and use it in psample action to pass it down to psample module. Reviewed-by: Aaron Conole Acked-by: Eelco Chaudron Reviewed-by: Ilya Maximets Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-7-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 3 ++- net/openvswitch/actions.c | 20 +++++++++++++++++--- net/openvswitch/datapath.h | 3 +++ net/openvswitch/vport.c | 1 + 4 files changed, 23 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 3dd653748725..3a701bd1f31b 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -649,7 +649,8 @@ enum ovs_flow_attr { * Actions are passed as nested attributes. * * Executes the specified actions with the given probability on a per-packet - * basis. + * basis. Nested actions will be able to access the probability value of the + * parent @OVS_ACTION_ATTR_SAMPLE. */ enum ovs_sample_attr { OVS_SAMPLE_ATTR_UNSPEC, diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 892d7e48fc5b..101f9a23792c 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1048,12 +1048,15 @@ static int sample(struct datapath *dp, struct sk_buff *skb, struct nlattr *sample_arg; int rem = nla_len(attr); const struct sample_arg *arg; + u32 init_probability; bool clone_flow_key; + int err; /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */ sample_arg = nla_data(attr); arg = nla_data(sample_arg); actions = nla_next(sample_arg, &rem); + init_probability = OVS_CB(skb)->probability; if ((arg->probability != U32_MAX) && (!arg->probability || get_random_u32() > arg->probability)) { @@ -1062,9 +1065,16 @@ static int sample(struct datapath *dp, struct sk_buff *skb, return 0; } + OVS_CB(skb)->probability = arg->probability; + clone_flow_key = !arg->exec; - return clone_execute(dp, skb, key, 0, actions, rem, last, - clone_flow_key); + err = clone_execute(dp, skb, key, 0, actions, rem, last, + clone_flow_key); + + if (!last) + OVS_CB(skb)->probability = init_probability; + + return err; } /* When 'last' is true, clone() should always consume the 'skb'. @@ -1311,6 +1321,7 @@ static void execute_psample(struct datapath *dp, struct sk_buff *skb, struct psample_group psample_group = {}; struct psample_metadata md = {}; const struct nlattr *a; + u32 rate; int rem; nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) { @@ -1329,8 +1340,11 @@ static void execute_psample(struct datapath *dp, struct sk_buff *skb, psample_group.net = ovs_dp_get_net(dp); md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex; md.trunc_size = skb->len - OVS_CB(skb)->cutlen; + md.rate_as_probability = 1; + + rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX; - psample_sample_packet(&psample_group, skb, 0, &md); + psample_sample_packet(&psample_group, skb, rate, &md); } #else static void execute_psample(struct datapath *dp, struct sk_buff *skb, diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 0cd29971a907..9ca6231ea647 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -115,12 +115,15 @@ struct datapath { * fragmented. * @acts_origlen: The netlink size of the flow actions applied to this skb. * @cutlen: The number of bytes from the packet end to be removed. + * @probability: The sampling probability that was applied to this skb; 0 means + * no sampling has occurred; U32_MAX means 100% probability. */ struct ovs_skb_cb { struct vport *input_vport; u16 mru; u16 acts_origlen; u32 cutlen; + u32 probability; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 972ae01a70f7..8732f6e51ae5 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -500,6 +500,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, OVS_CB(skb)->input_vport = vport; OVS_CB(skb)->mru = 0; OVS_CB(skb)->cutlen = 0; + OVS_CB(skb)->probability = 0; if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { u32 mark; -- cgit v1.2.3