From 537cf4e3cc2f6cc9088dcd6162de573f603adc29 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 20 Nov 2020 12:53:39 +0100 Subject: xsk: Fix umem cleanup bug at socket destruct Fix a bug that is triggered when a partially setup socket is destroyed. For a fully setup socket, a socket that has been bound to a device, the cleanup of the umem is performed at the end of the buffer pool's cleanup work queue item. This has to be performed in a work queue, and not in RCU cleanup, as it is doing a vunmap that cannot execute in interrupt context. However, when a socket has only been partially set up so that a umem has been created but the buffer pool has not, the code erroneously directly calls the umem cleanup function instead of using a work queue, and this leads to a BUG_ON() in vunmap(). As there in this case is no buffer pool, we cannot use its work queue, so we need to introduce a work queue for the umem and schedule this for the cleanup. So in the case there is no pool, we are going to use the umem's own work queue to schedule the cleanup. But if there is a pool, the cleanup of the umem is still being performed by the pool's work queue, as it is important that the umem is cleaned up after the pool. Fixes: e5e1a4bc916d ("xsk: Fix possible memory leak at socket close") Reported-by: Marek Majtyka Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Tested-by: Marek Majtyka Link: https://lore.kernel.org/bpf/1605873219-21629-1-git-send-email-magnus.karlsson@gmail.com --- include/net/xdp_sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 1a9559c0cbdd..4f4e93bf814c 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -31,6 +31,7 @@ struct xdp_umem { struct page **pgs; int id; struct list_head xsk_dma_list; + struct work_struct work; }; struct xsk_map { -- cgit v1.2.3 From 36ccdf85829a7dd6936dba5d02fa50138471f0d3 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Mon, 23 Nov 2020 18:56:00 +0100 Subject: net, xsk: Avoid taking multiple skbuff references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY") addressed the problem that packets were discarded from the Tx AF_XDP ring, when the driver returned NETDEV_TX_BUSY. Part of the fix was bumping the skbuff reference count, so that the buffer would not be freed by dev_direct_xmit(). A reference count larger than one means that the skbuff is "shared", which is not the case. If the "shared" skbuff is sent to the generic XDP receive path, netif_receive_generic_xdp(), and pskb_expand_head() is entered the BUG_ON(skb_shared(skb)) will trigger. This patch adds a variant to dev_direct_xmit(), __dev_direct_xmit(), where a user can select the skbuff free policy. This allows AF_XDP to avoid bumping the reference count, but still keep the NETDEV_TX_BUSY behavior. Fixes: 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY") Reported-by: Yonghong Song Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201123175600.146255-1-bjorn.topel@gmail.com --- include/linux/netdevice.h | 14 +++++++++++++- net/core/dev.c | 8 ++------ net/xdp/xsk.c | 8 +------- 3 files changed, 16 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 964b494b0e8d..76775abf259d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2813,9 +2813,21 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); + int dev_queue_xmit(struct sk_buff *skb); int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev); -int dev_direct_xmit(struct sk_buff *skb, u16 queue_id); +int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id); + +static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) +{ + int ret; + + ret = __dev_direct_xmit(skb, queue_id); + if (!dev_xmit_complete(ret)) + kfree_skb(skb); + return ret; +} + int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); void unregister_netdevice_many(struct list_head *head); diff --git a/net/core/dev.c b/net/core/dev.c index 82dc6b48e45f..8588ade790cb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4180,7 +4180,7 @@ int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev) } EXPORT_SYMBOL(dev_queue_xmit_accel); -int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) +int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id) { struct net_device *dev = skb->dev; struct sk_buff *orig_skb = skb; @@ -4210,17 +4210,13 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) dev_xmit_recursion_dec(); local_bh_enable(); - - if (!dev_xmit_complete(ret)) - kfree_skb(skb); - return ret; drop: atomic_long_inc(&dev->tx_dropped); kfree_skb_list(skb); return NET_XMIT_DROP; } -EXPORT_SYMBOL(dev_direct_xmit); +EXPORT_SYMBOL(__dev_direct_xmit); /************************************************************************* * Receiver routines diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 5a6cdf7b320d..b7b039bd9d03 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -411,11 +411,7 @@ static int xsk_generic_xmit(struct sock *sk) skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr; skb->destructor = xsk_destruct_skb; - /* Hinder dev_direct_xmit from freeing the packet and - * therefore completing it in the destructor - */ - refcount_inc(&skb->users); - err = dev_direct_xmit(skb, xs->queue_id); + err = __dev_direct_xmit(skb, xs->queue_id); if (err == NETDEV_TX_BUSY) { /* Tell user-space to retry the send */ skb->destructor = sock_wfree; @@ -429,12 +425,10 @@ static int xsk_generic_xmit(struct sock *sk) /* Ignore NET_XMIT_CN as packet might have been sent */ if (err == NET_XMIT_DROP) { /* SKB completed but not sent */ - kfree_skb(skb); err = -EBUSY; goto out; } - consume_skb(skb); sent_frame = true; } -- cgit v1.2.3 From 3c78e9e0d33a27ab8050e4492c03c6a1f8d0ed6b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 25 Nov 2020 23:50:07 +0100 Subject: netfilter: nftables_offload: set address type in control dissector This patch adds nft_flow_rule_set_addr_type() to set the address type from the nft_payload expression accordingly. If the address type is not set in the control dissector then a rule that matches either on source or destination IP address does not work. After this patch, nft hardware offload generates the flow dissector configuration as tc-flower does to match on an IP address. This patch has been also tested functionally to make sure packets are filtered out by the NIC. This is also getting the code aligned with the existing netfilter flow offload infrastructure which is also setting the control dissector. Fixes: c9626a2cbdb2 ("netfilter: nf_tables: add hardware offload support") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_offload.h | 4 ++++ net/netfilter/nf_tables_offload.c | 17 +++++++++++++++++ net/netfilter/nft_payload.c | 4 ++++ 3 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index ea7d1d78b92d..bddd34c5bd79 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -37,6 +37,7 @@ void nft_offload_update_dependency(struct nft_offload_ctx *ctx, struct nft_flow_key { struct flow_dissector_key_basic basic; + struct flow_dissector_key_control control; union { struct flow_dissector_key_ipv4_addrs ipv4; struct flow_dissector_key_ipv6_addrs ipv6; @@ -62,6 +63,9 @@ struct nft_flow_rule { #define NFT_OFFLOAD_F_ACTION (1 << 0) +void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, + enum flow_dissector_key_id addr_type); + struct nft_rule; struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule); void nft_flow_rule_destroy(struct nft_flow_rule *flow); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 9f625724a20f..9ae14270c543 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -28,6 +28,23 @@ static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions) return flow; } +void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, + enum flow_dissector_key_id addr_type) +{ + struct nft_flow_match *match = &flow->match; + struct nft_flow_key *mask = &match->mask; + struct nft_flow_key *key = &match->key; + + if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) + return; + + key->control.addr_type = addr_type; + mask->control.addr_type = 0xffff; + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL); + match->dissector.offset[FLOW_DISSECTOR_KEY_CONTROL] = + offsetof(struct nft_flow_key, control); +} + struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule) { diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index dcd3c7b8a367..bbf811d030d5 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -244,6 +244,7 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx, NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, src, sizeof(struct in_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS); break; case offsetof(struct iphdr, daddr): if (priv->len != sizeof(struct in_addr)) @@ -251,6 +252,7 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx, NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, dst, sizeof(struct in_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS); break; case offsetof(struct iphdr, protocol): if (priv->len != sizeof(__u8)) @@ -280,6 +282,7 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx, NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, src, sizeof(struct in6_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS); break; case offsetof(struct ipv6hdr, daddr): if (priv->len != sizeof(struct in6_addr)) @@ -287,6 +290,7 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx, NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, dst, sizeof(struct in6_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS); break; case offsetof(struct ipv6hdr, nexthdr): if (priv->len != sizeof(__u8)) -- cgit v1.2.3 From a5d45bc0dc50f9dd83703510e9804d813a9cac32 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 25 Nov 2020 23:50:17 +0100 Subject: netfilter: nftables_offload: build mask based from the matching bytes Userspace might match on prefix bytes of header fields if they are on the byte boundary, this requires that the mask is adjusted accordingly. Use NFT_OFFLOAD_MATCH_EXACT() for meta since prefix byte matching is not allowed for this type of selector. The bitwise expression might be optimized out by userspace, hence the kernel needs to infer the prefix from the number of payload bytes to match on. This patch adds nft_payload_offload_mask() to calculate the bitmask to match on the prefix. Fixes: c9626a2cbdb2 ("netfilter: nf_tables: add hardware offload support") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_offload.h | 3 ++ net/netfilter/nft_cmp.c | 8 ++-- net/netfilter/nft_meta.c | 16 ++++---- net/netfilter/nft_payload.c | 66 +++++++++++++++++++++++-------- 4 files changed, 64 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index bddd34c5bd79..1d34fe154fe0 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -78,6 +78,9 @@ int nft_flow_rule_offload_commit(struct net *net); offsetof(struct nft_flow_key, __base.__field); \ (__reg)->len = __len; \ (__reg)->key = __key; \ + +#define NFT_OFFLOAD_MATCH_EXACT(__key, __base, __field, __len, __reg) \ + NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \ memset(&(__reg)->mask, 0xff, (__reg)->len); int nft_chain_offload_priority(struct nft_base_chain *basechain); diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index bc079d68a536..00e563a72d3d 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -123,11 +123,11 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx, u8 *mask = (u8 *)&flow->match.mask; u8 *key = (u8 *)&flow->match.key; - if (priv->op != NFT_CMP_EQ || reg->len != priv->len) + if (priv->op != NFT_CMP_EQ || priv->len > reg->len) return -EOPNOTSUPP; - memcpy(key + reg->offset, &priv->data, priv->len); - memcpy(mask + reg->offset, ®->mask, priv->len); + memcpy(key + reg->offset, &priv->data, reg->len); + memcpy(mask + reg->offset, ®->mask, reg->len); flow->match.dissector.used_keys |= BIT(reg->key); flow->match.dissector.offset[reg->key] = reg->base_offset; @@ -137,7 +137,7 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx, nft_reg_load16(priv->data.data) != ARPHRD_ETHER) return -EOPNOTSUPP; - nft_offload_update_dependency(ctx, &priv->data, priv->len); + nft_offload_update_dependency(ctx, &priv->data, reg->len); return 0; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index b37bd02448d8..bf4b3ad5314c 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -724,22 +724,22 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx, switch (priv->key) { case NFT_META_PROTOCOL: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto, - sizeof(__u16), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto, + sizeof(__u16), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; case NFT_META_L4PROTO: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, - sizeof(__u8), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, + sizeof(__u8), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT); break; case NFT_META_IIF: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta, - ingress_ifindex, sizeof(__u32), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta, + ingress_ifindex, sizeof(__u32), reg); break; case NFT_META_IIFTYPE: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta, - ingress_iftype, sizeof(__u16), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta, + ingress_iftype, sizeof(__u16), reg); break; default: return -EOPNOTSUPP; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index bbf811d030d5..47d4e0e21651 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -165,6 +165,34 @@ nla_put_failure: return -1; } +static bool nft_payload_offload_mask(struct nft_offload_reg *reg, + u32 priv_len, u32 field_len) +{ + unsigned int remainder, delta, k; + struct nft_data mask = {}; + __be32 remainder_mask; + + if (priv_len == field_len) { + memset(®->mask, 0xff, priv_len); + return true; + } else if (priv_len > field_len) { + return false; + } + + memset(&mask, 0xff, field_len); + remainder = priv_len % sizeof(u32); + if (remainder) { + k = priv_len / sizeof(u32); + delta = field_len - priv_len; + remainder_mask = htonl(~((1 << (delta * BITS_PER_BYTE)) - 1)); + mask.data[k] = (__force u32)remainder_mask; + } + + memcpy(®->mask, &mask, field_len); + + return true; +} + static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_payload *priv) @@ -173,21 +201,21 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct ethhdr, h_source): - if (priv->len != ETH_ALEN) + if (!nft_payload_offload_mask(reg, priv->len, ETH_ALEN)) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs, src, ETH_ALEN, reg); break; case offsetof(struct ethhdr, h_dest): - if (priv->len != ETH_ALEN) + if (!nft_payload_offload_mask(reg, priv->len, ETH_ALEN)) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs, dst, ETH_ALEN, reg); break; case offsetof(struct ethhdr, h_proto): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, @@ -195,14 +223,14 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; case offsetof(struct vlan_ethhdr, h_vlan_TCI): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan, vlan_tci, sizeof(__be16), reg); break; case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan, @@ -210,7 +238,7 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; case offsetof(struct vlan_ethhdr, h_vlan_TCI) + sizeof(struct vlan_hdr): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan, @@ -218,7 +246,7 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, break; case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto) + sizeof(struct vlan_hdr): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan, @@ -239,7 +267,8 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct iphdr, saddr): - if (priv->len != sizeof(struct in_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, src, @@ -247,7 +276,8 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx, nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS); break; case offsetof(struct iphdr, daddr): - if (priv->len != sizeof(struct in_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, dst, @@ -255,7 +285,7 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx, nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS); break; case offsetof(struct iphdr, protocol): - if (priv->len != sizeof(__u8)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__u8))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, @@ -277,7 +307,8 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct ipv6hdr, saddr): - if (priv->len != sizeof(struct in6_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in6_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, src, @@ -285,7 +316,8 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx, nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS); break; case offsetof(struct ipv6hdr, daddr): - if (priv->len != sizeof(struct in6_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in6_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, dst, @@ -293,7 +325,7 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx, nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS); break; case offsetof(struct ipv6hdr, nexthdr): - if (priv->len != sizeof(__u8)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__u8))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, @@ -335,14 +367,14 @@ static int nft_payload_offload_tcp(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct tcphdr, source): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src, sizeof(__be16), reg); break; case offsetof(struct tcphdr, dest): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst, @@ -363,14 +395,14 @@ static int nft_payload_offload_udp(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct udphdr, source): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src, sizeof(__be16), reg); break; case offsetof(struct udphdr, dest): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst, -- cgit v1.2.3 From 2867e1eac61016f59b3d730e3f7aa488e186e917 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Mon, 30 Nov 2020 19:37:05 +0100 Subject: inet_ecn: Fix endianness of checksum update when setting ECT(1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When adding support for propagating ECT(1) marking in IP headers it seems I suffered from endianness-confusion in the checksum update calculation: In fact the ECN field is in the *lower* bits of the first 16-bit word of the IP header when calculating in network byte order. This means that the addition performed to update the checksum field was wrong; let's fix that. Fixes: b723748750ec ("tunnel: Propagate ECT(1) when decapsulating as recommended by RFC6040") Reported-by: Jonathan Morton Tested-by: Pete Heist Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20201130183705.17540-1-toke@redhat.com Signed-off-by: Jakub Kicinski --- include/net/inet_ecn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index e1eaf1780288..563457fec557 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -107,7 +107,7 @@ static inline int IP_ECN_set_ect1(struct iphdr *iph) if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0) return 0; - check += (__force u16)htons(0x100); + check += (__force u16)htons(0x1); iph->check = (__force __sum16)(check + (check>=0xFFFF)); iph->tos ^= INET_ECN_MASK; -- cgit v1.2.3 From d421e466c2373095f165ddd25cbabd6c5b077928 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 2 Dec 2020 20:39:46 -0800 Subject: net/mlx5: DR, Proper handling of unsupported Connect-X6DX SW steering STEs format for Connect-X5 and Connect-X6DX different. Currently, on Connext-X6DX the SW steering would break at some point when building STEs w/o giving a proper error message. Fix this by checking the STE format of the current device when initializing domain: add mlx5_ifc definitions for Connect-X6DX SW steering, read FW capability to get the current format version, and check this version when domain is being created. Fixes: 26d688e33f88 ("net/mlx5: DR, Add Steering entry (STE) utilities") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 5 +++++ drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h | 1 + include/linux/mlx5/mlx5_ifc.h | 9 ++++++++- 4 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 6bd34b293007..51bbd88ff021 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -92,6 +92,7 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, caps->eswitch_manager = MLX5_CAP_GEN(mdev, eswitch_manager); caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); + caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version); if (mlx5dr_matcher_supp_flex_parser_icmp_v4(caps)) { caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 890767a2a7cb..aa2c2d6c44e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -223,6 +223,11 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev, if (ret) return ret; + if (dmn->info.caps.sw_format_ver != MLX5_STEERING_FORMAT_CONNECTX_5) { + mlx5dr_err(dmn, "SW steering is not supported on this device\n"); + return -EOPNOTSUPP; + } + ret = dr_domain_query_fdb_caps(mdev, dmn); if (ret) return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index f50f3b107aa3..cf62ea4f882e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -625,6 +625,7 @@ struct mlx5dr_cmd_caps { u8 max_ft_level; u16 roce_min_src_udp; u8 num_esw_ports; + u8 sw_format_ver; bool eswitch_manager; bool rx_sw_owner; bool tx_sw_owner; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a092346c7b2d..233352447b1a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1223,6 +1223,11 @@ enum mlx5_fc_bulk_alloc_bitmask { #define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum)) +enum { + MLX5_STEERING_FORMAT_CONNECTX_5 = 0, + MLX5_STEERING_FORMAT_CONNECTX_6DX = 1, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x30]; u8 vhca_id[0x10]; @@ -1521,7 +1526,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 general_obj_types[0x40]; - u8 reserved_at_440[0x20]; + u8 reserved_at_440[0x4]; + u8 steering_format_version[0x4]; + u8 create_qp_start_hint[0x18]; u8 reserved_at_460[0x3]; u8 log_max_uctx[0x5]; -- cgit v1.2.3