summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2022-10-28 12:47:42 +0300
committerDavid S. Miller <davem@davemloft.net>2022-10-28 12:47:42 +0300
commit957ed5e7129f2ce85dd76e4cdce749388295467d (patch)
treea6b1ae601b9d5ef8bfd0a520407754330b43c903 /include
parent7f86cf50cfa62d8028ba1fa9383c9645b3665e64 (diff)
parent71fc704768f601ed3fa36310822a5e03f310f781 (diff)
downloadlinux-957ed5e7129f2ce85dd76e4cdce749388295467d.tar.xz
Merge branch 'tcp-plb'
Mubashir Adnan Qureshi says: ==================== net: Add PLB functionality to TCP This patch series adds PLB (Protective Load Balancing) to TCP and hooks it up to DCTCP. PLB is disabled by default and can be enabled using relevant sysctls and support from underlying CC. PLB (Protective Load Balancing) is a host based mechanism for load balancing across switch links. It leverages congestion signals(e.g. ECN) from transport layer to randomly change the path of the connection experiencing congestion. PLB changes the path of the connection by changing the outgoing IPv6 flow label for IPv6 connections (implemented in Linux by calling sk_rethink_txhash()). Because of this implementation mechanism, PLB can currently only work for IPv6 traffic. For more information, see the SIGCOMM 2022 paper: https://doi.org/10.1145/3544216.3544226 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/tcp.h1
-rw-r--r--include/net/netns/ipv4.h5
-rw-r--r--include/net/tcp.h28
-rw-r--r--include/uapi/linux/snmp.h1
-rw-r--r--include/uapi/linux/tcp.h6
5 files changed, 41 insertions, 0 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 41b1da621a45..ca7f05a130d2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -423,6 +423,7 @@ struct tcp_sock {
u32 probe_seq_start;
u32 probe_seq_end;
} mtu_probe;
+ u32 plb_rehash; /* PLB-triggered rehash attempts */
u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
* while socket was owned by user.
*/
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 1b8004679445..25f90bba4889 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -183,6 +183,11 @@ struct netns_ipv4 {
unsigned long tfo_active_disable_stamp;
u32 tcp_challenge_timestamp;
u32 tcp_challenge_count;
+ u8 sysctl_tcp_plb_enabled;
+ u8 sysctl_tcp_plb_idle_rehash_rounds;
+ u8 sysctl_tcp_plb_rehash_rounds;
+ u8 sysctl_tcp_plb_suspend_rto_sec;
+ int sysctl_tcp_plb_cong_thresh;
int sysctl_udp_wmem_min;
int sysctl_udp_rmem_min;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 14d45661a84d..6b814e788f00 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2140,6 +2140,34 @@ extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
extern void tcp_rack_reo_timeout(struct sock *sk);
extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs);
+/* tcp_plb.c */
+
+/*
+ * Scaling factor for fractions in PLB. For example, tcp_plb_update_state
+ * expects cong_ratio which represents fraction of traffic that experienced
+ * congestion over a single RTT. In order to avoid floating point operations,
+ * this fraction should be mapped to (1 << TCP_PLB_SCALE) and passed in.
+ */
+#define TCP_PLB_SCALE 8
+
+/* State for PLB (Protective Load Balancing) for a single TCP connection. */
+struct tcp_plb_state {
+ u8 consec_cong_rounds:5, /* consecutive congested rounds */
+ unused:3;
+ u32 pause_until; /* jiffies32 when PLB can resume rerouting */
+};
+
+static inline void tcp_plb_init(const struct sock *sk,
+ struct tcp_plb_state *plb)
+{
+ plb->consec_cong_rounds = 0;
+ plb->pause_until = 0;
+}
+void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb,
+ const int cong_ratio);
+void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb);
+void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb);
+
/* At how many usecs into the future should the RTO fire? */
static inline s64 tcp_rto_delta_us(const struct sock *sk)
{
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 4d7470036a8b..6600cb0164c2 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -292,6 +292,7 @@ enum
LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */
LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */
LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */
+ LINUX_MIB_TCPPLBREHASH, /* TCPPLBRehash */
__LINUX_MIB_MAX
};
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 8fc09e8638b3..879eeb0a084b 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -284,6 +284,11 @@ struct tcp_info {
__u32 tcpi_snd_wnd; /* peer's advertised receive window after
* scaling (bytes)
*/
+ __u32 tcpi_rcv_wnd; /* local advertised receive window after
+ * scaling (bytes)
+ */
+
+ __u32 tcpi_rehash; /* PLB or timeout triggered rehash attempts */
};
/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@@ -315,6 +320,7 @@ enum {
TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */
TCP_NLA_EDT, /* Earliest departure time (CLOCK_MONOTONIC) */
TCP_NLA_TTL, /* TTL or hop limit of a packet received */
+ TCP_NLA_REHASH, /* PLB and timeout triggered rehash attempts */
};
/* for TCP_MD5SIG socket option */