summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTariq Toukan <tariqt@nvidia.com>2021-01-17 17:59:44 +0300
committerJakub Kicinski <kuba@kernel.org>2021-01-19 07:48:40 +0300
commit007feb87fb15933b5de7135e6bdf57c219b3fbec (patch)
treeada163a6bc36e2a0e78555fbd3f2b528163291df
parent5b99854540e35c2c6a226bcdb4bafbae1bccad5a (diff)
downloadlinux-007feb87fb15933b5de7135e6bdf57c219b3fbec.tar.xz
net/bonding: Implement ndo_sk_get_lower_dev
Add ndo_sk_get_lower_dev() implementation for bond interfaces. Support only for the cases where the socket's and SKBs' hash yields identical value for the whole connection lifetime. Here we restrict it to L3+4 sockets only, with xmit_hash_policy==LAYER34 and bond modes xor/802.3ad. Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Reviewed-by: Boris Pismenny <borisp@nvidia.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/bonding/bond_main.c93
-rw-r--r--include/net/bonding.h2
2 files changed, 95 insertions, 0 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 759ad22b7279..09524f99c753 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -301,6 +301,19 @@ netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
return dev_queue_xmit(skb);
}
+bool bond_sk_check(struct bonding *bond)
+{
+ switch (BOND_MODE(bond)) {
+ case BOND_MODE_8023AD:
+ case BOND_MODE_XOR:
+ if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34)
+ return true;
+ fallthrough;
+ default:
+ return false;
+ }
+}
+
/*---------------------------------- VLAN -----------------------------------*/
/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
@@ -4555,6 +4568,85 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev,
return NULL;
}
+static void bond_sk_to_flow(struct sock *sk, struct flow_keys *flow)
+{
+ switch (sk->sk_family) {
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ if (sk->sk_ipv6only ||
+ ipv6_addr_type(&sk->sk_v6_daddr) != IPV6_ADDR_MAPPED) {
+ flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ flow->addrs.v6addrs.src = inet6_sk(sk)->saddr;
+ flow->addrs.v6addrs.dst = sk->sk_v6_daddr;
+ break;
+ }
+ fallthrough;
+#endif
+ default: /* AF_INET */
+ flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ flow->addrs.v4addrs.src = inet_sk(sk)->inet_rcv_saddr;
+ flow->addrs.v4addrs.dst = inet_sk(sk)->inet_daddr;
+ break;
+ }
+
+ flow->ports.src = inet_sk(sk)->inet_sport;
+ flow->ports.dst = inet_sk(sk)->inet_dport;
+}
+
+/**
+ * bond_sk_hash_l34 - generate a hash value based on the socket's L3 and L4 fields
+ * @sk: socket to use for headers
+ *
+ * This function will extract the necessary field from the socket and use
+ * them to generate a hash based on the LAYER34 xmit_policy.
+ * Assumes that sk is a TCP or UDP socket.
+ */
+static u32 bond_sk_hash_l34(struct sock *sk)
+{
+ struct flow_keys flow;
+ u32 hash;
+
+ bond_sk_to_flow(sk, &flow);
+
+ /* L4 */
+ memcpy(&hash, &flow.ports.ports, sizeof(hash));
+ /* L3 */
+ return bond_ip_hash(hash, &flow);
+}
+
+static struct net_device *__bond_sk_get_lower_dev(struct bonding *bond,
+ struct sock *sk)
+{
+ struct bond_up_slave *slaves;
+ struct slave *slave;
+ unsigned int count;
+ u32 hash;
+
+ slaves = rcu_dereference(bond->usable_slaves);
+ count = slaves ? READ_ONCE(slaves->count) : 0;
+ if (unlikely(!count))
+ return NULL;
+
+ hash = bond_sk_hash_l34(sk);
+ slave = slaves->arr[hash % count];
+
+ return slave->dev;
+}
+
+static struct net_device *bond_sk_get_lower_dev(struct net_device *dev,
+ struct sock *sk)
+{
+ struct bonding *bond = netdev_priv(dev);
+ struct net_device *lower = NULL;
+
+ rcu_read_lock();
+ if (bond_sk_check(bond))
+ lower = __bond_sk_get_lower_dev(bond, sk);
+ rcu_read_unlock();
+
+ return lower;
+}
+
static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct bonding *bond = netdev_priv(dev);
@@ -4691,6 +4783,7 @@ static const struct net_device_ops bond_netdev_ops = {
.ndo_fix_features = bond_fix_features,
.ndo_features_check = passthru_features_check,
.ndo_get_xmit_slave = bond_xmit_get_slave,
+ .ndo_sk_get_lower_dev = bond_sk_get_lower_dev,
};
static const struct device_type bond_type = {
diff --git a/include/net/bonding.h b/include/net/bonding.h
index adc3da776970..21497193c4a4 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -265,6 +265,8 @@ struct bond_vlan_tag {
unsigned short vlan_id;
};
+bool bond_sk_check(struct bonding *bond);
+
/**
* Returns NULL if the net_device does not belong to any of the bond's slaves
*