summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.txt5
-rw-r--r--drivers/net/dsa/Kconfig6
-rw-r--r--drivers/net/dsa/mv88e6352.c31
-rw-r--r--drivers/net/dsa/mv88e6xxx.c42
-rw-r--r--drivers/net/dsa/mv88e6xxx.h8
-rw-r--r--drivers/net/ethernet/cadence/macb.c6
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c54
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/l2t.c94
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/l2t.h18
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c89
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h18
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/sge.c12
-rw-r--r--drivers/net/ethernet/ec_bhf.c14
-rw-r--r--drivers/net/ethernet/emulex/benet/be.h2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.c64
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.h3
-rw-r--r--drivers/net/ethernet/emulex/benet/be_ethtool.c15
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c40
-rw-r--r--drivers/net/ethernet/hisilicon/hip04_eth.c1
-rw-r--r--drivers/net/ethernet/hisilicon/hip04_mdio.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c20
-rw-r--r--drivers/net/ethernet/neterion/s2io.c26
-rw-r--r--drivers/net/ethernet/neterion/s2io.h2
-rw-r--r--drivers/net/ethernet/rocker/rocker.c61
-rw-r--r--drivers/net/ethernet/rocker/rocker.h1
-rw-r--r--drivers/net/ethernet/ti/netcp_core.c1
-rw-r--r--drivers/net/hyperv/hyperv_net.h1
-rw-r--r--drivers/net/hyperv/netvsc_drv.c3
-rw-r--r--drivers/net/hyperv/rndis_filter.c2
-rw-r--r--drivers/net/ifb.c207
-rw-r--r--drivers/net/phy/marvell.c42
-rw-r--r--drivers/net/phy/phy.c3
-rw-r--r--drivers/net/xen-netback/common.h3
-rw-r--r--include/linux/ipv6.h2
-rw-r--r--include/linux/phy.h2
-rw-r--r--include/net/act_api.h15
-rw-r--r--include/net/inet_hashtables.h4
-rw-r--r--include/net/inet_timewait_sock.h8
-rw-r--r--include/net/netns/ipv6.h1
-rw-r--r--include/net/sch_generic.h31
-rw-r--r--include/net/tc_act/tc_gact.h7
-rw-r--r--include/net/tc_act/tc_mirred.h2
-rw-r--r--include/net/tcp.h7
-rw-r--r--include/net/timewait_sock.h3
-rw-r--r--include/uapi/linux/if_bridge.h1
-rw-r--r--kernel/bpf/core.c6
-rw-r--r--lib/test_bpf.c6
-rw-r--r--net/bridge/br_mdb.c11
-rw-r--r--net/bridge/br_multicast.c5
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/timestamping.c6
-rw-r--r--net/ipv4/inet_hashtables.c38
-rw-r--r--net/ipv4/inet_timewait_sock.c55
-rw-r--r--net/ipv4/ip_fragment.c26
-rw-r--r--net/ipv4/ping.c3
-rw-r--r--net/ipv4/route.c3
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cdg.c2
-rw-r--r--net/ipv4/tcp_cong.c6
-rw-r--r--net/ipv4/tcp_cubic.c4
-rw-r--r--net/ipv4/tcp_highspeed.c2
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_hybla.c2
-rw-r--r--net/ipv4/tcp_illinois.c2
-rw-r--r--net/ipv4/tcp_input.c62
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/tcp_metrics.c2
-rw-r--r--net/ipv4/tcp_minisocks.c6
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_scalable.c2
-rw-r--r--net/ipv4/tcp_timer.c1
-rw-r--r--net/ipv4/tcp_vegas.c6
-rw-r--r--net/ipv4/tcp_veno.c2
-rw-r--r--net/ipv6/Kconfig11
-rw-r--r--net/ipv6/addrconf.c197
-rw-r--r--net/ipv6/af_inet6.c7
-rw-r--r--net/ipv6/datagram.c8
-rw-r--r--net/ipv6/exthdrs.c2
-rw-r--r--net/ipv6/inet6_hashtables.c9
-rw-r--r--net/ipv6/raw.c3
-rw-r--r--net/ipv6/sysctl_net_ipv6.c8
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/netfilter/xt_TPROXY.c6
-rw-r--r--net/sched/act_api.c44
-rw-r--r--net/sched/act_bpf.c2
-rw-r--r--net/sched/act_connmark.c3
-rw-r--r--net/sched/act_csum.c3
-rw-r--r--net/sched/act_gact.c44
-rw-r--r--net/sched/act_ipt.c2
-rw-r--r--net/sched/act_mirred.c58
-rw-r--r--net/sched/act_nat.c3
-rw-r--r--net/sched/act_pedit.c3
-rw-r--r--net/sched/act_simple.c3
-rw-r--r--net/sched/act_skbedit.c3
-rw-r--r--net/sched/act_vlan.c3
-rw-r--r--samples/bpf/bpf_helpers.h25
-rw-r--r--samples/bpf/tracex1_kern.c2
-rw-r--r--samples/bpf/tracex2_kern.c6
-rw-r--r--samples/bpf/tracex3_kern.c4
-rw-r--r--samples/bpf/tracex4_kern.c6
-rw-r--r--samples/bpf/tracex5_kern.c6
105 files changed, 1042 insertions, 698 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 5fae7704daab..f63aeefd2c24 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1435,6 +1435,11 @@ mtu - INTEGER
Default Maximum Transfer Unit
Default: 1280 (IPv6 required minimum)
+ip_nonlocal_bind - BOOLEAN
+ If set, allows processes to bind() to non-local IPv6 addresses,
+ which can be quite useful - but may break some applications.
+ Default: 0
+
router_probe_interval - INTEGER
Minimum interval (in seconds) between Router Probing described
in RFC4191.
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 7ad0a4d8e475..4c483d937481 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -46,13 +46,13 @@ config NET_DSA_MV88E6171
ethernet switches chips.
config NET_DSA_MV88E6352
- tristate "Marvell 88E6172/88E6176/88E6352 ethernet switch chip support"
+ tristate "Marvell 88E6172/6176/6320/6321/6352 ethernet switch chip support"
depends on NET_DSA
select NET_DSA_MV88E6XXX
select NET_DSA_TAG_EDSA
---help---
- This enables support for the Marvell 88E6172, 88E6176 and 88E6352
- ethernet switch chips.
+ This enables support for the Marvell 88E6172, 88E6176, 88E6320,
+ 88E6321 and 88E6352 ethernet switch chips.
config NET_DSA_BCM_SF2
tristate "Broadcom Starfighter 2 Ethernet switch support"
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index 632815c10a40..cfece5ae9d5f 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -36,6 +36,18 @@ static char *mv88e6352_probe(struct device *host_dev, int sw_addr)
return "Marvell 88E6172";
if ((ret & 0xfff0) == PORT_SWITCH_ID_6176)
return "Marvell 88E6176";
+ if (ret == PORT_SWITCH_ID_6320_A1)
+ return "Marvell 88E6320 (A1)";
+ if (ret == PORT_SWITCH_ID_6320_A2)
+ return "Marvell 88e6320 (A2)";
+ if ((ret & 0xfff0) == PORT_SWITCH_ID_6320)
+ return "Marvell 88E6320";
+ if (ret == PORT_SWITCH_ID_6321_A1)
+ return "Marvell 88E6321 (A1)";
+ if (ret == PORT_SWITCH_ID_6321_A2)
+ return "Marvell 88e6321 (A2)";
+ if ((ret & 0xfff0) == PORT_SWITCH_ID_6321)
+ return "Marvell 88E6321";
if (ret == PORT_SWITCH_ID_6352_A0)
return "Marvell 88E6352 (A0)";
if (ret == PORT_SWITCH_ID_6352_A1)
@@ -84,11 +96,12 @@ static int mv88e6352_setup_global(struct dsa_switch *ds)
static int mv88e6352_get_temp(struct dsa_switch *ds, int *temp)
{
+ int phy = mv88e6xxx_6320_family(ds) ? 3 : 0;
int ret;
*temp = 0;
- ret = mv88e6xxx_phy_page_read(ds, 0, 6, 27);
+ ret = mv88e6xxx_phy_page_read(ds, phy, 6, 27);
if (ret < 0)
return ret;
@@ -99,11 +112,12 @@ static int mv88e6352_get_temp(struct dsa_switch *ds, int *temp)
static int mv88e6352_get_temp_limit(struct dsa_switch *ds, int *temp)
{
+ int phy = mv88e6xxx_6320_family(ds) ? 3 : 0;
int ret;
*temp = 0;
- ret = mv88e6xxx_phy_page_read(ds, 0, 6, 26);
+ ret = mv88e6xxx_phy_page_read(ds, phy, 6, 26);
if (ret < 0)
return ret;
@@ -114,23 +128,25 @@ static int mv88e6352_get_temp_limit(struct dsa_switch *ds, int *temp)
static int mv88e6352_set_temp_limit(struct dsa_switch *ds, int temp)
{
+ int phy = mv88e6xxx_6320_family(ds) ? 3 : 0;
int ret;
- ret = mv88e6xxx_phy_page_read(ds, 0, 6, 26);
+ ret = mv88e6xxx_phy_page_read(ds, phy, 6, 26);
if (ret < 0)
return ret;
temp = clamp_val(DIV_ROUND_CLOSEST(temp, 5) + 5, 0, 0x1f);
- return mv88e6xxx_phy_page_write(ds, 0, 6, 26,
+ return mv88e6xxx_phy_page_write(ds, phy, 6, 26,
(ret & 0xe0ff) | (temp << 8));
}
static int mv88e6352_get_temp_alarm(struct dsa_switch *ds, bool *alarm)
{
+ int phy = mv88e6xxx_6320_family(ds) ? 3 : 0;
int ret;
*alarm = false;
- ret = mv88e6xxx_phy_page_read(ds, 0, 6, 26);
+ ret = mv88e6xxx_phy_page_read(ds, phy, 6, 26);
if (ret < 0)
return ret;
@@ -394,5 +410,8 @@ struct dsa_switch_driver mv88e6352_switch_driver = {
.fdb_getnext = mv88e6xxx_port_fdb_getnext,
};
-MODULE_ALIAS("platform:mv88e6352");
MODULE_ALIAS("platform:mv88e6172");
+MODULE_ALIAS("platform:mv88e6176");
+MODULE_ALIAS("platform:mv88e6320");
+MODULE_ALIAS("platform:mv88e6321");
+MODULE_ALIAS("platform:mv88e6352");
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index fd8547c2b79d..f394e4d4d9e0 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -517,6 +517,18 @@ static bool mv88e6xxx_6185_family(struct dsa_switch *ds)
return false;
}
+bool mv88e6xxx_6320_family(struct dsa_switch *ds)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+
+ switch (ps->id) {
+ case PORT_SWITCH_ID_6320:
+ case PORT_SWITCH_ID_6321:
+ return true;
+ }
+ return false;
+}
+
static bool mv88e6xxx_6351_family(struct dsa_switch *ds)
{
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
@@ -565,7 +577,7 @@ static int _mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port)
{
int ret;
- if (mv88e6xxx_6352_family(ds))
+ if (mv88e6xxx_6320_family(ds) || mv88e6xxx_6352_family(ds))
port = (port + 1) << 5;
/* Snapshot the hardware statistics counters for this port. */
@@ -1377,7 +1389,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds) ||
- mv88e6xxx_6065_family(ds)) {
+ mv88e6xxx_6065_family(ds) || mv88e6xxx_6320_family(ds)) {
/* MAC Forcing register: don't force link, speed,
* duplex or flow control state to any particular
* values on physical ports, but force the CPU port
@@ -1423,7 +1435,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) ||
- mv88e6xxx_6185_family(ds))
+ mv88e6xxx_6185_family(ds) || mv88e6xxx_6320_family(ds))
reg = PORT_CONTROL_IGMP_MLD_SNOOP |
PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP |
PORT_CONTROL_STATE_FORWARDING;
@@ -1431,7 +1443,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds))
reg |= PORT_CONTROL_DSA_TAG;
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
- mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) {
+ mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
+ mv88e6xxx_6320_family(ds)) {
if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA)
reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA;
else
@@ -1441,14 +1454,15 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) ||
- mv88e6xxx_6185_family(ds)) {
+ mv88e6xxx_6185_family(ds) || mv88e6xxx_6320_family(ds)) {
if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA)
reg |= PORT_CONTROL_EGRESS_ADD_TAG;
}
}
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
- mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds)) {
+ mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) ||
+ mv88e6xxx_6320_family(ds)) {
if (ds->dsa_port_mask & (1 << port))
reg |= PORT_CONTROL_FRAME_MODE_DSA;
if (port == dsa_upstream_port(ds))
@@ -1473,11 +1487,11 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
reg = 0;
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
- mv88e6xxx_6095_family(ds))
+ mv88e6xxx_6095_family(ds) || mv88e6xxx_6320_family(ds))
reg = PORT_CONTROL_2_MAP_DA;
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
- mv88e6xxx_6165_family(ds))
+ mv88e6xxx_6165_family(ds) || mv88e6xxx_6320_family(ds))
reg |= PORT_CONTROL_2_JUMBO_10240;
if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds)) {
@@ -1514,7 +1528,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
goto abort;
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
- mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) {
+ mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
+ mv88e6xxx_6320_family(ds)) {
/* Do not limit the period of time that this port can
* be paused for by the remote end or the period of
* time that this port can pause the remote end.
@@ -1564,7 +1579,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
- mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds)) {
+ mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds) ||
+ mv88e6xxx_6320_family(ds)) {
/* Rate Control: disable ingress rate limiting. */
ret = _mv88e6xxx_reg_write(ds, REG_PORT(port),
PORT_RATE_CONTROL, 0x0001);
@@ -1976,7 +1992,8 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
(i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT));
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
- mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) {
+ mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
+ mv88e6xxx_6320_family(ds)) {
/* Send all frames with destination addresses matching
* 01:80:c2:00:00:2x to the CPU port.
*/
@@ -1995,7 +2012,8 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
- mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds)) {
+ mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds) ||
+ mv88e6xxx_6320_family(ds)) {
/* Disable ingress rate limiting by resetting all
* ingress rate limit registers to their initial
* state.
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index a650b2656de9..64786cb89a93 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -89,7 +89,12 @@
#define PORT_SWITCH_ID_6182 0x1a60
#define PORT_SWITCH_ID_6185 0x1a70
#define PORT_SWITCH_ID_6240 0x2400
-#define PORT_SWITCH_ID_6320 0x1250
+#define PORT_SWITCH_ID_6320 0x1150
+#define PORT_SWITCH_ID_6320_A1 0x1151
+#define PORT_SWITCH_ID_6320_A2 0x1152
+#define PORT_SWITCH_ID_6321 0x3100
+#define PORT_SWITCH_ID_6321_A1 0x3101
+#define PORT_SWITCH_ID_6321_A2 0x3102
#define PORT_SWITCH_ID_6350 0x3710
#define PORT_SWITCH_ID_6351 0x3750
#define PORT_SWITCH_ID_6352 0x3520
@@ -410,6 +415,7 @@ int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port,
int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg);
int mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page,
int reg, int val);
+bool mv88e6xxx_6320_family(struct dsa_switch *ds);
extern struct dsa_switch_driver mv88e6131_switch_driver;
extern struct dsa_switch_driver mv88e6123_61_65_switch_driver;
extern struct dsa_switch_driver mv88e6352_switch_driver;
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index caeb39561567..a4e3f8655cb8 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -2741,8 +2741,7 @@ static const struct macb_config emac_config = {
static const struct macb_config zynqmp_config = {
- .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE |
- MACB_CAPS_JUMBO,
+ .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO,
.dma_burst_length = 16,
.clk_init = macb_clk_init,
.init = macb_init,
@@ -2750,8 +2749,7 @@ static const struct macb_config zynqmp_config = {
};
static const struct macb_config zynq_config = {
- .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE |
- MACB_CAPS_NO_GIGABIT_HALF,
+ .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF,
.dma_burst_length = 16,
.clk_init = macb_clk_init,
.init = macb_init,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index a11485fbb33f..b135d05c9984 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -151,6 +151,45 @@ static int cim_la_show_3in1(struct seq_file *seq, void *v, int idx)
return 0;
}
+static int cim_la_show_t6(struct seq_file *seq, void *v, int idx)
+{
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, "Status Inst Data PC LS0Stat "
+ "LS0Addr LS0Data LS1Stat LS1Addr LS1Data\n");
+ } else {
+ const u32 *p = v;
+
+ seq_printf(seq, " %02x %04x%04x %04x%04x %04x%04x %08x %08x %08x %08x %08x %08x\n",
+ (p[9] >> 16) & 0xff, /* Status */
+ p[9] & 0xffff, p[8] >> 16, /* Inst */
+ p[8] & 0xffff, p[7] >> 16, /* Data */
+ p[7] & 0xffff, p[6] >> 16, /* PC */
+ p[2], p[1], p[0], /* LS0 Stat, Addr and Data */
+ p[5], p[4], p[3]); /* LS1 Stat, Addr and Data */
+ }
+ return 0;
+}
+
+static int cim_la_show_pc_t6(struct seq_file *seq, void *v, int idx)
+{
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, "Status Inst Data PC\n");
+ } else {
+ const u32 *p = v;
+
+ seq_printf(seq, " %02x %08x %08x %08x\n",
+ p[3] & 0xff, p[2], p[1], p[0]);
+ seq_printf(seq, " %02x %02x%06x %02x%06x %02x%06x\n",
+ (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
+ p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
+ seq_printf(seq, " %02x %04x%04x %04x%04x %04x%04x\n",
+ (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
+ p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
+ p[6] >> 16);
+ }
+ return 0;
+}
+
static int cim_la_open(struct inode *inode, struct file *file)
{
int ret;
@@ -162,9 +201,18 @@ static int cim_la_open(struct inode *inode, struct file *file)
if (ret)
return ret;
- p = seq_open_tab(file, adap->params.cim_la_size / 8, 8 * sizeof(u32), 1,
- cfg & UPDBGLACAPTPCONLY_F ?
- cim_la_show_3in1 : cim_la_show);
+ if (is_t6(adap->params.chip)) {
+ /* +1 to account for integer division of CIMLA_SIZE/10 */
+ p = seq_open_tab(file, (adap->params.cim_la_size / 10) + 1,
+ 10 * sizeof(u32), 1,
+ cfg & UPDBGLACAPTPCONLY_F ?
+ cim_la_show_pc_t6 : cim_la_show_t6);
+ } else {
+ p = seq_open_tab(file, adap->params.cim_la_size / 8,
+ 8 * sizeof(u32), 1,
+ cfg & UPDBGLACAPTPCONLY_F ? cim_la_show_3in1 :
+ cim_la_show);
+ }
if (!p)
return -ENOMEM;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 351f3b1bf800..d582e175dfb6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4757,7 +4757,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
*/
cfg_queues(adapter);
- adapter->l2t = t4_init_l2t();
+ adapter->l2t = t4_init_l2t(adapter->l2t_start, adapter->l2t_end);
if (!adapter->l2t) {
/* We tolerate a lack of L2T, giving up some functionality */
dev_warn(&pdev->dev, "could not allocate L2T, continuing\n");
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index 252efc29321f..ac27898c6ab0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
@@ -51,24 +51,17 @@
#define VLAN_NONE 0xfff
/* identifies sync vs async L2T_WRITE_REQs */
-#define F_SYNC_WR (1 << 12)
-
-enum {
- L2T_STATE_VALID, /* entry is up to date */
- L2T_STATE_STALE, /* entry may be used but needs revalidation */
- L2T_STATE_RESOLVING, /* entry needs address resolution */
- L2T_STATE_SYNC_WRITE, /* synchronous write of entry underway */
-
- /* when state is one of the below the entry is not hashed */
- L2T_STATE_SWITCHING, /* entry is being used by a switching filter */
- L2T_STATE_UNUSED /* entry not in use */
-};
+#define SYNC_WR_S 12
+#define SYNC_WR_V(x) ((x) << SYNC_WR_S)
+#define SYNC_WR_F SYNC_WR_V(1)
struct l2t_data {
+ unsigned int l2t_start; /* start index of our piece of the L2T */
+ unsigned int l2t_size; /* number of entries in l2tab */
rwlock_t lock;
atomic_t nfree; /* number of free entries */
struct l2t_entry *rover; /* starting point for next allocation */
- struct l2t_entry l2tab[L2T_SIZE];
+ struct l2t_entry l2tab[0]; /* MUST BE LAST */
};
static inline unsigned int vlan_prio(const struct l2t_entry *e)
@@ -85,29 +78,36 @@ static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e)
/*
* To avoid having to check address families we do not allow v4 and v6
* neighbors to be on the same hash chain. We keep v4 entries in the first
- * half of available hash buckets and v6 in the second.
+ * half of available hash buckets and v6 in the second. We need at least two
+ * entries in our L2T for this scheme to work.
*/
enum {
- L2T_SZ_HALF = L2T_SIZE / 2,
- L2T_HASH_MASK = L2T_SZ_HALF - 1
+ L2T_MIN_HASH_BUCKETS = 2,
};
-static inline unsigned int arp_hash(const u32 *key, int ifindex)
+static inline unsigned int arp_hash(struct l2t_data *d, const u32 *key,
+ int ifindex)
{
- return jhash_2words(*key, ifindex, 0) & L2T_HASH_MASK;
+ unsigned int l2t_size_half = d->l2t_size / 2;
+
+ return jhash_2words(*key, ifindex, 0) % l2t_size_half;
}
-static inline unsigned int ipv6_hash(const u32 *key, int ifindex)
+static inline unsigned int ipv6_hash(struct l2t_data *d, const u32 *key,
+ int ifindex)
{
+ unsigned int l2t_size_half = d->l2t_size / 2;
u32 xor = key[0] ^ key[1] ^ key[2] ^ key[3];
- return L2T_SZ_HALF + (jhash_2words(xor, ifindex, 0) & L2T_HASH_MASK);
+ return (l2t_size_half +
+ (jhash_2words(xor, ifindex, 0) % l2t_size_half));
}
-static unsigned int addr_hash(const u32 *addr, int addr_len, int ifindex)
+static unsigned int addr_hash(struct l2t_data *d, const u32 *addr,
+ int addr_len, int ifindex)
{
- return addr_len == 4 ? arp_hash(addr, ifindex) :
- ipv6_hash(addr, ifindex);
+ return addr_len == 4 ? arp_hash(d, addr, ifindex) :
+ ipv6_hash(d, addr, ifindex);
}
/*
@@ -139,6 +139,8 @@ static void neigh_replace(struct l2t_entry *e, struct neighbour *n)
*/
static int write_l2e(struct adapter *adap, struct l2t_entry *e, int sync)
{
+ struct l2t_data *d = adap->l2t;
+ unsigned int l2t_idx = e->idx + d->l2t_start;
struct sk_buff *skb;
struct cpl_l2t_write_req *req;
@@ -150,10 +152,10 @@ static int write_l2e(struct adapter *adap, struct l2t_entry *e, int sync)
INIT_TP_WR(req, 0);
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ,
- e->idx | (sync ? F_SYNC_WR : 0) |
+ l2t_idx | (sync ? SYNC_WR_F : 0) |
TID_QID_V(adap->sge.fw_evtq.abs_id)));
req->params = htons(L2T_W_PORT_V(e->lport) | L2T_W_NOREPLY_V(!sync));
- req->l2t_idx = htons(e->idx);
+ req->l2t_idx = htons(l2t_idx);
req->vlan = htons(e->vlan);
if (e->neigh && !(e->neigh->dev->flags & IFF_LOOPBACK))
memcpy(e->dmac, e->neigh->ha, sizeof(e->dmac));
@@ -190,18 +192,19 @@ static void send_pending(struct adapter *adap, struct l2t_entry *e)
*/
void do_l2t_write_rpl(struct adapter *adap, const struct cpl_l2t_write_rpl *rpl)
{
+ struct l2t_data *d = adap->l2t;
unsigned int tid = GET_TID(rpl);
- unsigned int idx = tid & (L2T_SIZE - 1);
+ unsigned int l2t_idx = tid % L2T_SIZE;
if (unlikely(rpl->status != CPL_ERR_NONE)) {
dev_err(adap->pdev_dev,
"Unexpected L2T_WRITE_RPL status %u for entry %u\n",
- rpl->status, idx);
+ rpl->status, l2t_idx);
return;
}
- if (tid & F_SYNC_WR) {
- struct l2t_entry *e = &adap->l2t->l2tab[idx];
+ if (tid & SYNC_WR_F) {
+ struct l2t_entry *e = &d->l2tab[l2t_idx - d->l2t_start];
spin_lock(&e->lock);
if (e->state != L2T_STATE_SWITCHING) {
@@ -276,7 +279,7 @@ static struct l2t_entry *alloc_l2e(struct l2t_data *d)
return NULL;
/* there's definitely a free entry */
- for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e)
+ for (e = d->rover, end = &d->l2tab[d->l2t_size]; e != end; ++e)
if (atomic_read(&e->refcnt) == 0)
goto found;
@@ -368,7 +371,7 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh,
int addr_len = neigh->tbl->key_len;
u32 *addr = (u32 *)neigh->primary_key;
int ifidx = neigh->dev->ifindex;
- int hash = addr_hash(addr, addr_len, ifidx);
+ int hash = addr_hash(d, addr, addr_len, ifidx);
if (neigh->dev->flags & IFF_LOOPBACK)
lport = netdev2pinfo(physdev)->tx_chan + 4;
@@ -481,7 +484,7 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh)
int addr_len = neigh->tbl->key_len;
u32 *addr = (u32 *) neigh->primary_key;
int ifidx = neigh->dev->ifindex;
- int hash = addr_hash(addr, addr_len, ifidx);
+ int hash = addr_hash(d, addr, addr_len, ifidx);
read_lock_bh(&d->lock);
for (e = d->l2tab[hash].first; e; e = e->next)
@@ -554,20 +557,30 @@ int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan,
return write_l2e(adap, e, 0);
}
-struct l2t_data *t4_init_l2t(void)
+struct l2t_data *t4_init_l2t(unsigned int l2t_start, unsigned int l2t_end)
{
+ unsigned int l2t_size;
int i;
struct l2t_data *d;
- d = t4_alloc_mem(sizeof(*d));
+ if (l2t_start >= l2t_end || l2t_end >= L2T_SIZE)
+ return NULL;
+ l2t_size = l2t_end - l2t_start + 1;
+ if (l2t_size < L2T_MIN_HASH_BUCKETS)
+ return NULL;
+
+ d = t4_alloc_mem(sizeof(*d) + l2t_size * sizeof(struct l2t_entry));
if (!d)
return NULL;
+ d->l2t_start = l2t_start;
+ d->l2t_size = l2t_size;
+
d->rover = d->l2tab;
- atomic_set(&d->nfree, L2T_SIZE);
+ atomic_set(&d->nfree, l2t_size);
rwlock_init(&d->lock);
- for (i = 0; i < L2T_SIZE; ++i) {
+ for (i = 0; i < d->l2t_size; ++i) {
d->l2tab[i].idx = i;
d->l2tab[i].state = L2T_STATE_UNUSED;
spin_lock_init(&d->l2tab[i].lock);
@@ -578,9 +591,9 @@ struct l2t_data *t4_init_l2t(void)
static inline void *l2t_get_idx(struct seq_file *seq, loff_t pos)
{
- struct l2t_entry *l2tab = seq->private;
+ struct l2t_data *d = seq->private;
- return pos >= L2T_SIZE ? NULL : &l2tab[pos];
+ return pos >= d->l2t_size ? NULL : &d->l2tab[pos];
}
static void *l2t_seq_start(struct seq_file *seq, loff_t *pos)
@@ -620,6 +633,7 @@ static int l2t_seq_show(struct seq_file *seq, void *v)
"Ethernet address VLAN/P LP State Users Port\n");
else {
char ip[60];
+ struct l2t_data *d = seq->private;
struct l2t_entry *e = v;
spin_lock_bh(&e->lock);
@@ -628,7 +642,7 @@ static int l2t_seq_show(struct seq_file *seq, void *v)
else
sprintf(ip, e->v6 ? "%pI6c" : "%pI4", e->addr);
seq_printf(seq, "%4u %-25s %17pM %4d %u %2u %c %5u %s\n",
- e->idx, ip, e->dmac,
+ e->idx + d->l2t_start, ip, e->dmac,
e->vlan & VLAN_VID_MASK, vlan_prio(e), e->lport,
l2e_state(e), atomic_read(&e->refcnt),
e->neigh ? e->neigh->dev->name : "");
@@ -652,7 +666,7 @@ static int l2t_seq_open(struct inode *inode, struct file *file)
struct adapter *adap = inode->i_private;
struct seq_file *seq = file->private_data;
- seq->private = adap->l2t->l2tab;
+ seq->private = adap->l2t;
}
return rc;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.h b/drivers/net/ethernet/chelsio/cxgb4/l2t.h
index a30126ce90cb..b38dc526aad5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.h
@@ -39,6 +39,20 @@
#include <linux/if_ether.h>
#include <linux/atomic.h>
+enum { L2T_SIZE = 4096 }; /* # of L2T entries */
+
+enum {
+ L2T_STATE_VALID, /* entry is up to date */
+ L2T_STATE_STALE, /* entry may be used but needs revalidation */
+ L2T_STATE_RESOLVING, /* entry needs address resolution */
+ L2T_STATE_SYNC_WRITE, /* synchronous write of entry underway */
+ L2T_STATE_NOARP, /* Netdev down or removed*/
+
+ /* when state is one of the below the entry is not hashed */
+ L2T_STATE_SWITCHING, /* entry is being used by a switching filter */
+ L2T_STATE_UNUSED /* entry not in use */
+};
+
struct adapter;
struct l2t_data;
struct neighbour;
@@ -56,7 +70,7 @@ struct cpl_l2t_write_rpl;
*/
struct l2t_entry {
u16 state; /* entry state */
- u16 idx; /* entry index */
+ u16 idx; /* entry index within in-memory table */
u32 addr[4]; /* next hop IP or IPv6 address */
int ifindex; /* neighbor's net_device's ifindex */
struct neighbour *neigh; /* associated neighbour */
@@ -104,7 +118,7 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh);
struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *d);
int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan,
u8 port, u8 *eth_addr);
-struct l2t_data *t4_init_l2t(void);
+struct l2t_data *t4_init_l2t(unsigned int l2t_start, unsigned int l2t_end);
void do_l2t_write_rpl(struct adapter *p, const struct cpl_l2t_write_rpl *rpl);
extern const struct file_operations t4_l2t_fops;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 2b52aae7ec86..1e6597dc8736 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -1345,9 +1345,9 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
0x5a80, 0x5a9c,
0x5b94, 0x5bfc,
0x5c10, 0x5ec0,
- 0x5ec8, 0x5ec8,
+ 0x5ec8, 0x5ecc,
0x6000, 0x6040,
- 0x6058, 0x6154,
+ 0x6058, 0x615c,
0x7700, 0x7798,
0x77c0, 0x7880,
0x78cc, 0x78fc,
@@ -1371,20 +1371,22 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
0x9f00, 0x9f6c,
0x9f80, 0xa020,
0xd004, 0xd03c,
+ 0xd100, 0xd118,
+ 0xd200, 0xd31c,
0xdfc0, 0xdfe0,
0xe000, 0xf008,
0x11000, 0x11014,
0x11048, 0x11110,
0x11118, 0x1117c,
- 0x11190, 0x11260,
+ 0x11190, 0x11264,
0x11300, 0x1130c,
- 0x12000, 0x1205c,
+ 0x12000, 0x1206c,
0x19040, 0x1906c,
0x19078, 0x19080,
0x1908c, 0x19124,
0x19150, 0x191b0,
0x191d0, 0x191e8,
- 0x19238, 0x192b8,
+ 0x19238, 0x192bc,
0x193f8, 0x19474,
0x19490, 0x194cc,
0x194f0, 0x194f8,
@@ -1466,7 +1468,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
0x30200, 0x30318,
0x30400, 0x3052c,
0x30540, 0x3061c,
- 0x30800, 0x3088c,
+ 0x30800, 0x30890,
0x308c0, 0x30908,
0x30910, 0x309b8,
0x30a00, 0x30a04,
@@ -1544,7 +1546,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
0x34200, 0x34318,
0x34400, 0x3452c,
0x34540, 0x3461c,
- 0x34800, 0x3488c,
+ 0x34800, 0x34890,
0x348c0, 0x34908,
0x34910, 0x349b8,
0x34a00, 0x34a04,
@@ -3924,43 +3926,25 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4,
*/
void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st)
{
- /* T6 and later has 2 channels */
- if (adap->params.arch.nchan == NCHAN) {
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->mac_in_errs, 12, TP_MIB_MAC_IN_ERR_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->tnl_cong_drops, 8,
- TP_MIB_TNL_CNG_DROP_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->tnl_tx_drops, 4,
- TP_MIB_TNL_DROP_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->ofld_vlan_drops, 4,
- TP_MIB_OFD_VLN_DROP_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->tcp6_in_errs, 4,
- TP_MIB_TCP_V6IN_ERR_0_A);
- } else {
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->mac_in_errs, 2, TP_MIB_MAC_IN_ERR_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->hdr_in_errs, 2, TP_MIB_HDR_IN_ERR_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->tcp_in_errs, 2, TP_MIB_TCP_IN_ERR_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->tnl_cong_drops, 2,
- TP_MIB_TNL_CNG_DROP_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->ofld_chan_drops, 2,
- TP_MIB_OFD_CHN_DROP_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->tnl_tx_drops, 2, TP_MIB_TNL_DROP_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->ofld_vlan_drops, 2,
- TP_MIB_OFD_VLN_DROP_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->tcp6_in_errs, 2, TP_MIB_TCP_V6IN_ERR_0_A);
- }
+ int nchan = adap->params.arch.nchan;
+
+ t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+ st->mac_in_errs, nchan, TP_MIB_MAC_IN_ERR_0_A);
+ t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+ st->hdr_in_errs, nchan, TP_MIB_HDR_IN_ERR_0_A);
+ t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+ st->tcp_in_errs, nchan, TP_MIB_TCP_IN_ERR_0_A);
+ t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+ st->tnl_cong_drops, nchan, TP_MIB_TNL_CNG_DROP_0_A);
+ t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+ st->ofld_chan_drops, nchan, TP_MIB_OFD_CHN_DROP_0_A);
+ t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+ st->tnl_tx_drops, nchan, TP_MIB_TNL_DROP_0_A);
+ t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+ st->ofld_vlan_drops, nchan, TP_MIB_OFD_VLN_DROP_0_A);
+ t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+ st->tcp6_in_errs, nchan, TP_MIB_TCP_V6IN_ERR_0_A);
+
t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
&st->ofld_no_neigh, 2, TP_MIB_OFD_ARP_DROP_A);
}
@@ -3974,16 +3958,13 @@ void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st)
*/
void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st)
{
- /* T6 and later has 2 channels */
- if (adap->params.arch.nchan == NCHAN) {
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->req,
- 8, TP_MIB_CPL_IN_REQ_0_A);
- } else {
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->req,
- 2, TP_MIB_CPL_IN_REQ_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->rsp,
- 2, TP_MIB_CPL_OUT_RSP_0_A);
- }
+ int nchan = adap->params.arch.nchan;
+
+ t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->req,
+ nchan, TP_MIB_CPL_IN_REQ_0_A);
+ t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->rsp,
+ nchan, TP_MIB_CPL_OUT_RSP_0_A);
+
}
/**
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
index c8488f430d19..640369df8b3a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
@@ -47,7 +47,6 @@ enum {
TCB_SIZE = 128, /* TCB size */
NMTUS = 16, /* size of MTU table */
NCCTRL_WIN = 32, /* # of congestion control windows */
- L2T_SIZE = 4096, /* # of L2T entries */
PM_NSTATS = 5, /* # of PM stats */
MBOX_LEN = 64, /* mailbox size in bytes */
TRACE_LEN = 112, /* length of trace data and mask */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index d7ca106927b0..8353a6cbfcc2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
@@ -142,6 +142,8 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
CH_PCI_ID_TABLE_FENTRY(0x5013), /* T580-chr */
CH_PCI_ID_TABLE_FENTRY(0x5014), /* T580-so */
CH_PCI_ID_TABLE_FENTRY(0x5015), /* T502-bt */
+ CH_PCI_ID_TABLE_FENTRY(0x5016), /* T580-OCP-SO */
+ CH_PCI_ID_TABLE_FENTRY(0x5017), /* T520-OCP-SO */
CH_PCI_ID_TABLE_FENTRY(0x5080), /* Custom T540-cr */
CH_PCI_ID_TABLE_FENTRY(0x5081), /* Custom T540-LL-cr */
CH_PCI_ID_TABLE_FENTRY(0x5082), /* Custom T504-cr */
@@ -155,6 +157,22 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
CH_PCI_ID_TABLE_FENTRY(0x5090), /* Custom T540-CR */
CH_PCI_ID_TABLE_FENTRY(0x5091), /* Custom T522-CR */
CH_PCI_ID_TABLE_FENTRY(0x5092), /* Custom T520-CR */
+
+ /* T6 adapters:
+ */
+ CH_PCI_ID_TABLE_FENTRY(0x6001),
+ CH_PCI_ID_TABLE_FENTRY(0x6002),
+ CH_PCI_ID_TABLE_FENTRY(0x6003),
+ CH_PCI_ID_TABLE_FENTRY(0x6004),
+ CH_PCI_ID_TABLE_FENTRY(0x6005),
+ CH_PCI_ID_TABLE_FENTRY(0x6006),
+ CH_PCI_ID_TABLE_FENTRY(0x6007),
+ CH_PCI_ID_TABLE_FENTRY(0x6009),
+ CH_PCI_ID_TABLE_FENTRY(0x600d),
+ CH_PCI_ID_TABLE_FENTRY(0x6010),
+ CH_PCI_ID_TABLE_FENTRY(0x6011),
+ CH_PCI_ID_TABLE_FENTRY(0x6014),
+ CH_PCI_ID_TABLE_FENTRY(0x6015),
CH_PCI_DEVICE_ID_TABLE_DEFINE_END;
#endif /* __T4_PCI_ID_TBL_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index ad53e5ad2acd..1d5e77a566e1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -1898,7 +1898,10 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
rspq->unhandled_irqs++;
val = CIDXINC_V(work_done) | SEINTARM_V(intr_params);
- if (is_t4(rspq->adapter->params.chip)) {
+ /* If we don't have access to the new User GTS (T5+), use the old
+ * doorbell mechanism; otherwise use the new BAR2 mechanism.
+ */
+ if (unlikely(!rspq->bar2_addr)) {
t4_write_reg(rspq->adapter,
T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
val | INGRESSQID_V((u32)rspq->cntxt_id));
@@ -1998,10 +2001,13 @@ static unsigned int process_intrq(struct adapter *adapter)
}
val = CIDXINC_V(work_done) | SEINTARM_V(intrq->intr_params);
- if (is_t4(adapter->params.chip))
+ /* If we don't have access to the new User GTS (T5+), use the old
+ * doorbell mechanism; otherwise use the new BAR2 mechanism.
+ */
+ if (unlikely(!intrq->bar2_addr)) {
t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
val | INGRESSQID_V(intrq->cntxt_id));
- else {
+ } else {
writel(val | INGRESSQID_V(intrq->bar2_qid),
intrq->bar2_addr + SGE_UDB_GTS);
wmb();
diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
index d1017509b08a..f7b42483921c 100644
--- a/drivers/net/ethernet/ec_bhf.c
+++ b/drivers/net/ethernet/ec_bhf.c
@@ -604,19 +604,7 @@ static struct pci_driver pci_driver = {
.probe = ec_bhf_probe,
.remove = ec_bhf_remove,
};
-
-static int __init ec_bhf_init(void)
-{
- return pci_register_driver(&pci_driver);
-}
-
-static void __exit ec_bhf_exit(void)
-{
- pci_unregister_driver(&pci_driver);
-}
-
-module_init(ec_bhf_init);
-module_exit(ec_bhf_exit);
+module_pci_driver(pci_driver);
module_param(polling_frequency, long, S_IRUGO);
MODULE_PARM_DESC(polling_frequency, "Polling timer frequency in ns");
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 8d12b41b3b19..cb5777bb7429 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -37,7 +37,7 @@
#include "be_hw.h"
#include "be_roce.h"
-#define DRV_VER "10.6.0.2"
+#define DRV_VER "10.6.0.3"
#define DRV_NAME "be2net"
#define BE_NAME "Emulex BladeEngine2"
#define BE3_NAME "Emulex BladeEngine3"
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 9eac3227d2ca..ecad46f79653 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -88,19 +88,21 @@ static inline void *embedded_payload(struct be_mcc_wrb *wrb)
return wrb->payload.embedded_payload;
}
-static void be_mcc_notify(struct be_adapter *adapter)
+static int be_mcc_notify(struct be_adapter *adapter)
{
struct be_queue_info *mccq = &adapter->mcc_obj.q;
u32 val = 0;
if (be_check_error(adapter, BE_ERROR_ANY))
- return;
+ return -EIO;
val |= mccq->id & DB_MCCQ_RING_ID_MASK;
val |= 1 << DB_MCCQ_NUM_POSTED_SHIFT;
wmb();
iowrite32(val, adapter->db + DB_MCCQ_OFFSET);
+
+ return 0;
}
/* To check if valid bit is set, check the entire word as we don't know
@@ -170,6 +172,12 @@ static void be_async_cmd_process(struct be_adapter *adapter,
return;
}
+ if (opcode == OPCODE_LOWLEVEL_SET_LOOPBACK_MODE &&
+ subsystem == CMD_SUBSYSTEM_LOWLEVEL) {
+ complete(&adapter->et_cmd_compl);
+ return;
+ }
+
if ((opcode == OPCODE_COMMON_WRITE_FLASHROM ||
opcode == OPCODE_COMMON_WRITE_OBJECT) &&
subsystem == CMD_SUBSYSTEM_COMMON) {
@@ -541,7 +549,9 @@ static int be_mcc_notify_wait(struct be_adapter *adapter)
resp = be_decode_resp_hdr(wrb->tag0, wrb->tag1);
- be_mcc_notify(adapter);
+ status = be_mcc_notify(adapter);
+ if (status)
+ goto out;
status = be_mcc_wait_compl(adapter);
if (status == -EIO)
@@ -1547,7 +1557,10 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd)
else
hdr->version = 2;
- be_mcc_notify(adapter);
+ status = be_mcc_notify(adapter);
+ if (status)
+ goto err;
+
adapter->stats_cmd_sent = true;
err:
@@ -1583,7 +1596,10 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter,
req->cmd_params.params.pport_num = cpu_to_le16(adapter->hba_port_num);
req->cmd_params.params.reset_stats = 0;
- be_mcc_notify(adapter);
+ status = be_mcc_notify(adapter);
+ if (status)
+ goto err;
+
adapter->stats_cmd_sent = true;
err:
@@ -1687,8 +1703,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter)
OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES,
sizeof(*req), wrb, NULL);
- be_mcc_notify(adapter);
-
+ status = be_mcc_notify(adapter);
err:
spin_unlock_bh(&adapter->mcc_lock);
return status;
@@ -1860,7 +1875,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter,
cpu_to_le32(set_eqd[i].delay_multiplier);
}
- be_mcc_notify(adapter);
+ status = be_mcc_notify(adapter);
err:
spin_unlock_bh(&adapter->mcc_lock);
return status;
@@ -1953,7 +1968,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value)
memcpy(req->mcast_mac[i++].byte, ha->addr, ETH_ALEN);
}
- status = be_mcc_notify_wait(adapter);
+ status = be_mcc_notify(adapter);
err:
spin_unlock_bh(&adapter->mcc_lock);
return status;
@@ -2320,7 +2335,10 @@ int lancer_cmd_write_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
req->addr_high = cpu_to_le32(upper_32_bits(cmd->dma +
sizeof(struct lancer_cmd_req_write_object)));
- be_mcc_notify(adapter);
+ status = be_mcc_notify(adapter);
+ if (status)
+ goto err_unlock;
+
spin_unlock_bh(&adapter->mcc_lock);
if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
@@ -2491,7 +2509,10 @@ int be_cmd_write_flashrom(struct be_adapter *adapter, struct be_dma_mem *cmd,
req->params.op_code = cpu_to_le32(flash_opcode);
req->params.data_buf_size = cpu_to_le32(buf_size);
- be_mcc_notify(adapter);
+ status = be_mcc_notify(adapter);
+ if (status)
+ goto err_unlock;
+
spin_unlock_bh(&adapter->mcc_lock);
if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
@@ -2585,7 +2606,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num,
wrb = wrb_from_mccq(adapter);
if (!wrb) {
status = -EBUSY;
- goto err;
+ goto err_unlock;
}
req = embedded_payload(wrb);
@@ -2599,8 +2620,19 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num,
req->loopback_type = loopback_type;
req->loopback_state = enable;
- status = be_mcc_notify_wait(adapter);
-err:
+ status = be_mcc_notify(adapter);
+ if (status)
+ goto err_unlock;
+
+ spin_unlock_bh(&adapter->mcc_lock);
+
+ if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
+ msecs_to_jiffies(SET_LB_MODE_TIMEOUT)))
+ status = -ETIMEDOUT;
+
+ return status;
+
+err_unlock:
spin_unlock_bh(&adapter->mcc_lock);
return status;
}
@@ -2636,7 +2668,9 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num,
req->num_pkts = cpu_to_le32(num_pkts);
req->loopback_type = cpu_to_le32(loopback_type);
- be_mcc_notify(adapter);
+ status = be_mcc_notify(adapter);
+ if (status)
+ goto err;
spin_unlock_bh(&adapter->mcc_lock);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 2716e6f30d9a..a4479f7488d3 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -1495,6 +1495,8 @@ struct be_cmd_resp_acpi_wol_magic_config_v1 {
#define BE_PME_D3COLD_CAP 0x80
/********************** LoopBack test *********************/
+#define SET_LB_MODE_TIMEOUT 12000
+
struct be_cmd_req_loopback_test {
struct be_cmd_req_hdr hdr;
u32 loopback_type;
@@ -1758,6 +1760,7 @@ struct be_cmd_req_set_mac_list {
/*********************** HSW Config ***********************/
#define PORT_FWD_TYPE_VEPA 0x3
#define PORT_FWD_TYPE_VEB 0x2
+#define PORT_FWD_TYPE_PASSTHRU 0x1
#define ENABLE_MAC_SPOOFCHK 0x2
#define DISABLE_MAC_SPOOFCHK 0x3
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index b2476dbfd103..d20ff054c1f7 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -847,10 +847,21 @@ err:
static u64 be_loopback_test(struct be_adapter *adapter, u8 loopback_type,
u64 *status)
{
- be_cmd_set_loopback(adapter, adapter->hba_port_num, loopback_type, 1);
+ int ret;
+
+ ret = be_cmd_set_loopback(adapter, adapter->hba_port_num,
+ loopback_type, 1);
+ if (ret)
+ return ret;
+
*status = be_cmd_loopback_test(adapter, adapter->hba_port_num,
loopback_type, 1500, 2, 0xabc);
- be_cmd_set_loopback(adapter, adapter->hba_port_num, BE_NO_LOOPBACK, 1);
+
+ ret = be_cmd_set_loopback(adapter, adapter->hba_port_num,
+ BE_NO_LOOPBACK, 1);
+ if (ret)
+ return ret;
+
return *status;
}
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 6f642426308c..c996dd76f546 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1254,7 +1254,7 @@ static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
if (is_udp_pkt((*skb))) {
struct udphdr *udp = udp_hdr((*skb));
- switch (udp->dest) {
+ switch (ntohs(udp->dest)) {
case DHCP_CLIENT_PORT:
os2bmc = is_dhcp_client_filt_enabled(adapter);
goto done;
@@ -3529,15 +3529,15 @@ err:
static int be_setup_wol(struct be_adapter *adapter, bool enable)
{
+ struct device *dev = &adapter->pdev->dev;
struct be_dma_mem cmd;
- int status = 0;
u8 mac[ETH_ALEN];
+ int status;
eth_zero_addr(mac);
cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config);
- cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
- GFP_KERNEL);
+ cmd.va = dma_zalloc_coherent(dev, cmd.size, &cmd.dma, GFP_KERNEL);
if (!cmd.va)
return -ENOMEM;
@@ -3546,24 +3546,18 @@ static int be_setup_wol(struct be_adapter *adapter, bool enable)
PCICFG_PM_CONTROL_OFFSET,
PCICFG_PM_CONTROL_MASK);
if (status) {
- dev_err(&adapter->pdev->dev,
- "Could not enable Wake-on-lan\n");
- dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va,
- cmd.dma);
- return status;
+ dev_err(dev, "Could not enable Wake-on-lan\n");
+ goto err;
}
- status = be_cmd_enable_magic_wol(adapter,
- adapter->netdev->dev_addr,
- &cmd);
- pci_enable_wake(adapter->pdev, PCI_D3hot, 1);
- pci_enable_wake(adapter->pdev, PCI_D3cold, 1);
} else {
- status = be_cmd_enable_magic_wol(adapter, mac, &cmd);
- pci_enable_wake(adapter->pdev, PCI_D3hot, 0);
- pci_enable_wake(adapter->pdev, PCI_D3cold, 0);
+ ether_addr_copy(mac, adapter->netdev->dev_addr);
}
- dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
+ status = be_cmd_enable_magic_wol(adapter, mac, &cmd);
+ pci_enable_wake(adapter->pdev, PCI_D3hot, enable);
+ pci_enable_wake(adapter->pdev, PCI_D3cold, enable);
+err:
+ dma_free_coherent(dev, cmd.size, cmd.va, cmd.dma);
return status;
}
@@ -4924,7 +4918,7 @@ static bool be_check_ufi_compatibility(struct be_adapter *adapter,
{
if (!fhdr) {
dev_err(&adapter->pdev->dev, "Invalid FW UFI file");
- return -1;
+ return false;
}
/* First letter of the build version is used to identify
@@ -5079,9 +5073,6 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
int status = 0;
u8 hsw_mode;
- if (!sriov_enabled(adapter))
- return 0;
-
/* BE and Lancer chips support VEB mode only */
if (BEx_chip(adapter) || lancer_chip(adapter)) {
hsw_mode = PORT_FWD_TYPE_VEB;
@@ -5091,6 +5082,9 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
NULL);
if (status)
return 0;
+
+ if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
+ return 0;
}
return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
@@ -5813,7 +5807,6 @@ static int be_pci_resume(struct pci_dev *pdev)
if (status)
return status;
- pci_set_power_state(pdev, PCI_D0);
pci_restore_state(pdev);
status = be_resume(adapter);
@@ -5893,7 +5886,6 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
return PCI_ERS_RESULT_DISCONNECT;
pci_set_master(pdev);
- pci_set_power_state(pdev, PCI_D0);
pci_restore_state(pdev);
/* Check if card is ok and fw is ready */
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index d49bee38cd31..cc2d8b4b18e3 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -965,7 +965,6 @@ static struct platform_driver hip04_mac_driver = {
.remove = hip04_remove,
.driver = {
.name = DRV_NAME,
- .owner = THIS_MODULE,
.of_match_table = hip04_mac_match,
},
};
diff --git a/drivers/net/ethernet/hisilicon/hip04_mdio.c b/drivers/net/ethernet/hisilicon/hip04_mdio.c
index b3bac25db99c..fca0a5be1f0f 100644
--- a/drivers/net/ethernet/hisilicon/hip04_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hip04_mdio.c
@@ -174,7 +174,6 @@ static struct platform_driver hip04_mdio_driver = {
.remove = hip04_mdio_remove,
.driver = {
.name = "hip04-mdio",
- .owner = THIS_MODULE,
.of_match_table = hip04_mdio_match,
},
};
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 7a4f20bb7fcb..12c65e1ad6a9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -917,7 +917,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
if (dev->features & NETIF_F_RXHASH)
skb_set_hash(gro_skb,
be32_to_cpu(cqe->immed_rss_invalid),
- PKT_HASH_TYPE_L3);
+ (ip_summed == CHECKSUM_UNNECESSARY) ?
+ PKT_HASH_TYPE_L4 :
+ PKT_HASH_TYPE_L3);
skb_record_rx_queue(gro_skb, cq->ring);
skb_mark_napi_id(gro_skb, &cq->napi);
@@ -963,7 +965,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
if (dev->features & NETIF_F_RXHASH)
skb_set_hash(skb,
be32_to_cpu(cqe->immed_rss_invalid),
- PKT_HASH_TYPE_L3);
+ (ip_summed == CHECKSUM_UNNECESSARY) ?
+ PKT_HASH_TYPE_L4 :
+ PKT_HASH_TYPE_L3);
if ((be32_to_cpu(cqe->vlan_my_qpn) &
MLX4_CQE_VLAN_PRESENT_MASK) &&
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 12fbfcb44d8a..d76f4257e305 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2907,6 +2907,8 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
{
u64 dev_flags = dev->flags;
int err = 0;
+ int fw_enabled_sriov_vfs = min(pci_sriov_get_totalvfs(pdev),
+ MLX4_MAX_NUM_VF);
if (reset_flow) {
dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
@@ -2932,6 +2934,12 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
}
if (!(dev->flags & MLX4_FLAG_SRIOV)) {
+ if (total_vfs > fw_enabled_sriov_vfs) {
+ mlx4_err(dev, "requested vfs (%d) > available vfs (%d). Continuing without SR_IOV\n",
+ total_vfs, fw_enabled_sriov_vfs);
+ err = -ENOMEM;
+ goto disable_sriov;
+ }
mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
err = pci_enable_sriov(pdev, total_vfs);
}
@@ -3413,20 +3421,20 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
goto err_disable_pdev;
}
}
- if (total_vfs >= MLX4_MAX_NUM_VF) {
+ if (total_vfs > MLX4_MAX_NUM_VF) {
dev_err(&pdev->dev,
- "Requested more VF's (%d) than allowed (%d)\n",
- total_vfs, MLX4_MAX_NUM_VF - 1);
+ "Requested more VF's (%d) than allowed by hw (%d)\n",
+ total_vfs, MLX4_MAX_NUM_VF);
err = -EINVAL;
goto err_disable_pdev;
}
for (i = 0; i < MLX4_MAX_PORTS; i++) {
- if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
+ if (nvfs[i] + nvfs[2] > MLX4_MAX_NUM_VF_P_PORT) {
dev_err(&pdev->dev,
- "Requested more VF's (%d) for port (%d) than allowed (%d)\n",
+ "Requested more VF's (%d) for port (%d) than allowed by driver (%d)\n",
nvfs[i] + nvfs[2], i + 1,
- MLX4_MAX_NUM_VF_P_PORT - 1);
+ MLX4_MAX_NUM_VF_P_PORT);
err = -EINVAL;
goto err_disable_pdev;
}
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index c28111749e1f..2d1b94274079 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -8226,31 +8226,7 @@ static void s2io_rem_nic(struct pci_dev *pdev)
pci_disable_device(pdev);
}
-/**
- * s2io_starter - Entry point for the driver
- * Description: This function is the entry point for the driver. It verifies
- * the module loadable parameters and initializes PCI configuration space.
- */
-
-static int __init s2io_starter(void)
-{
- return pci_register_driver(&s2io_driver);
-}
-
-/**
- * s2io_closer - Cleanup routine for the driver
- * Description: This function is the cleanup routine for the driver. It
- * unregisters the driver.
- */
-
-static __exit void s2io_closer(void)
-{
- pci_unregister_driver(&s2io_driver);
- DBG_PRINT(INIT_DBG, "cleanup done\n");
-}
-
-module_init(s2io_starter);
-module_exit(s2io_closer);
+module_pci_driver(s2io_driver);
static int check_L2_lro_capable(u8 *buffer, struct iphdr **ip,
struct tcphdr **tcp, struct RxD_t *rxdp,
diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h
index d89b6ed82c51..6c5997dc8afc 100644
--- a/drivers/net/ethernet/neterion/s2io.h
+++ b/drivers/net/ethernet/neterion/s2io.h
@@ -1085,8 +1085,6 @@ static void s2io_txpic_intr_handle(struct s2io_nic *sp);
static void tx_intr_handler(struct fifo_info *fifo_data);
static void s2io_handle_errors(void * dev_id);
-static int s2io_starter(void);
-static void s2io_closer(void);
static void s2io_tx_watchdog(struct net_device *dev);
static void s2io_set_multicast(struct net_device *dev);
static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp);
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index 2d8578cade03..c0051673c9fa 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -1818,6 +1818,30 @@ rocker_cmd_set_port_settings_macaddr_prep(const struct rocker_port *rocker_port,
}
static int
+rocker_cmd_set_port_settings_mtu_prep(const struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ int mtu = *(int *)priv;
+ struct rocker_tlv *cmd_info;
+
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE,
+ ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS))
+ return -EMSGSIZE;
+ cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
+ if (!cmd_info)
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT,
+ rocker_port->pport))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_MTU,
+ mtu))
+ return -EMSGSIZE;
+ rocker_tlv_nest_end(desc_info, cmd_info);
+ return 0;
+}
+
+static int
rocker_cmd_set_port_learning_prep(const struct rocker_port *rocker_port,
struct rocker_desc_info *desc_info,
void *priv)
@@ -1874,6 +1898,14 @@ static int rocker_cmd_set_port_settings_macaddr(struct rocker_port *rocker_port,
macaddr, NULL, NULL);
}
+static int rocker_cmd_set_port_settings_mtu(struct rocker_port *rocker_port,
+ int mtu)
+{
+ return rocker_cmd_exec(rocker_port, SWITCHDEV_TRANS_NONE, 0,
+ rocker_cmd_set_port_settings_mtu_prep,
+ &mtu, NULL, NULL);
+}
+
static int rocker_port_set_learning(struct rocker_port *rocker_port,
enum switchdev_trans trans)
{
@@ -4152,6 +4184,34 @@ static int rocker_port_set_mac_address(struct net_device *dev, void *p)
return 0;
}
+static int rocker_port_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+ int running = netif_running(dev);
+ int err;
+
+#define ROCKER_PORT_MIN_MTU 68
+#define ROCKER_PORT_MAX_MTU 9000
+
+ if (new_mtu < ROCKER_PORT_MIN_MTU || new_mtu > ROCKER_PORT_MAX_MTU)
+ return -EINVAL;
+
+ if (running)
+ rocker_port_stop(dev);
+
+ netdev_info(dev, "MTU change from %d to %d\n", dev->mtu, new_mtu);
+ dev->mtu = new_mtu;
+
+ err = rocker_cmd_set_port_settings_mtu(rocker_port, new_mtu);
+ if (err)
+ return err;
+
+ if (running)
+ err = rocker_port_open(dev);
+
+ return err;
+}
+
static int rocker_port_get_phys_port_name(struct net_device *dev,
char *buf, size_t len)
{
@@ -4172,6 +4232,7 @@ static const struct net_device_ops rocker_port_netdev_ops = {
.ndo_stop = rocker_port_stop,
.ndo_start_xmit = rocker_port_xmit,
.ndo_set_mac_address = rocker_port_set_mac_address,
+ .ndo_change_mtu = rocker_port_change_mtu,
.ndo_bridge_getlink = switchdev_port_bridge_getlink,
.ndo_bridge_setlink = switchdev_port_bridge_setlink,
.ndo_bridge_dellink = switchdev_port_bridge_dellink,
diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h
index c61fbf968036..08b2c3d96188 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -159,6 +159,7 @@ enum {
ROCKER_TLV_CMD_PORT_SETTINGS_MODE, /* u8 */
ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, /* u8 */
ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME, /* binary */
+ ROCKER_TLV_CMD_PORT_SETTINGS_MTU, /* u16 */
__ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
ROCKER_TLV_CMD_PORT_SETTINGS_MAX =
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 5ec4ed3f6c8d..3e47202b9010 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -2142,7 +2142,6 @@ MODULE_DEVICE_TABLE(of, of_match);
static struct platform_driver netcp_driver = {
.driver = {
.name = "netcp-1.0",
- .owner = THIS_MODULE,
.of_match_table = of_match,
},
.probe = netcp_probe,
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index dd4544085db3..26cd14ccf4d5 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -589,6 +589,7 @@ struct nvsp_message {
#define NETVSC_MTU 65536
+#define NETVSC_MTU_MIN 68
#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */
#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 358475ed9b59..b855ba9a507d 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -743,8 +743,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
limit = NETVSC_MTU - ETH_HLEN;
- /* Hyper-V hosts don't support MTU < ETH_DATA_LEN (1500) */
- if (mtu < ETH_DATA_LEN || mtu > limit)
+ if (mtu < NETVSC_MTU_MIN || mtu > limit)
return -EINVAL;
nvdev->start_remove = true;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 236aeb76ef22..2e40417a8087 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1054,7 +1054,7 @@ int rndis_filter_device_add(struct hv_device *dev,
ret = rndis_filter_query_device(rndis_device,
RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE,
&mtu, &size);
- if (ret == 0 && size == sizeof(u32))
+ if (ret == 0 && size == sizeof(u32) && mtu < net_device->ndev->mtu)
net_device->ndev->mtu = mtu;
/* Get the mac address */
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 94570aace241..cc56fac3c3f8 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -38,69 +38,68 @@
#include <net/net_namespace.h>
#define TX_Q_LIMIT 32
-struct ifb_private {
+struct ifb_q_private {
+ struct net_device *dev;
struct tasklet_struct ifb_tasklet;
- int tasklet_pending;
-
- struct u64_stats_sync rsync;
+ int tasklet_pending;
+ int txqnum;
struct sk_buff_head rq;
- u64 rx_packets;
- u64 rx_bytes;
+ u64 rx_packets;
+ u64 rx_bytes;
+ struct u64_stats_sync rsync;
struct u64_stats_sync tsync;
+ u64 tx_packets;
+ u64 tx_bytes;
struct sk_buff_head tq;
- u64 tx_packets;
- u64 tx_bytes;
-};
+} ____cacheline_aligned_in_smp;
-static int numifbs = 2;
+struct ifb_dev_private {
+ struct ifb_q_private *tx_private;
+};
-static void ri_tasklet(unsigned long dev);
static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev);
static int ifb_open(struct net_device *dev);
static int ifb_close(struct net_device *dev);
-static void ri_tasklet(unsigned long dev)
+static void ifb_ri_tasklet(unsigned long _txp)
{
- struct net_device *_dev = (struct net_device *)dev;
- struct ifb_private *dp = netdev_priv(_dev);
+ struct ifb_q_private *txp = (struct ifb_q_private *)_txp;
struct netdev_queue *txq;
struct sk_buff *skb;
- txq = netdev_get_tx_queue(_dev, 0);
- if ((skb = skb_peek(&dp->tq)) == NULL) {
- if (__netif_tx_trylock(txq)) {
- skb_queue_splice_tail_init(&dp->rq, &dp->tq);
- __netif_tx_unlock(txq);
- } else {
- /* reschedule */
+ txq = netdev_get_tx_queue(txp->dev, txp->txqnum);
+ skb = skb_peek(&txp->tq);
+ if (!skb) {
+ if (!__netif_tx_trylock(txq))
goto resched;
- }
+ skb_queue_splice_tail_init(&txp->rq, &txp->tq);
+ __netif_tx_unlock(txq);
}
- while ((skb = __skb_dequeue(&dp->tq)) != NULL) {
+ while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
u32 from = G_TC_FROM(skb->tc_verd);
skb->tc_verd = 0;
skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
- u64_stats_update_begin(&dp->tsync);
- dp->tx_packets++;
- dp->tx_bytes += skb->len;
- u64_stats_update_end(&dp->tsync);
+ u64_stats_update_begin(&txp->tsync);
+ txp->tx_packets++;
+ txp->tx_bytes += skb->len;
+ u64_stats_update_end(&txp->tsync);
rcu_read_lock();
- skb->dev = dev_get_by_index_rcu(dev_net(_dev), skb->skb_iif);
+ skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif);
if (!skb->dev) {
rcu_read_unlock();
dev_kfree_skb(skb);
- _dev->stats.tx_dropped++;
- if (skb_queue_len(&dp->tq) != 0)
+ txp->dev->stats.tx_dropped++;
+ if (skb_queue_len(&txp->tq) != 0)
goto resched;
break;
}
rcu_read_unlock();
- skb->skb_iif = _dev->ifindex;
+ skb->skb_iif = txp->dev->ifindex;
if (from & AT_EGRESS) {
dev_queue_xmit(skb);
@@ -112,10 +111,11 @@ static void ri_tasklet(unsigned long dev)
}
if (__netif_tx_trylock(txq)) {
- if ((skb = skb_peek(&dp->rq)) == NULL) {
- dp->tasklet_pending = 0;
- if (netif_queue_stopped(_dev))
- netif_wake_queue(_dev);
+ skb = skb_peek(&txp->rq);
+ if (!skb) {
+ txp->tasklet_pending = 0;
+ if (netif_tx_queue_stopped(txq))
+ netif_tx_wake_queue(txq);
} else {
__netif_tx_unlock(txq);
goto resched;
@@ -123,8 +123,8 @@ static void ri_tasklet(unsigned long dev)
__netif_tx_unlock(txq);
} else {
resched:
- dp->tasklet_pending = 1;
- tasklet_schedule(&dp->ifb_tasklet);
+ txp->tasklet_pending = 1;
+ tasklet_schedule(&txp->ifb_tasklet);
}
}
@@ -132,29 +132,58 @@ resched:
static struct rtnl_link_stats64 *ifb_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
- struct ifb_private *dp = netdev_priv(dev);
+ struct ifb_dev_private *dp = netdev_priv(dev);
+ struct ifb_q_private *txp = dp->tx_private;
unsigned int start;
-
- do {
- start = u64_stats_fetch_begin_irq(&dp->rsync);
- stats->rx_packets = dp->rx_packets;
- stats->rx_bytes = dp->rx_bytes;
- } while (u64_stats_fetch_retry_irq(&dp->rsync, start));
-
- do {
- start = u64_stats_fetch_begin_irq(&dp->tsync);
-
- stats->tx_packets = dp->tx_packets;
- stats->tx_bytes = dp->tx_bytes;
-
- } while (u64_stats_fetch_retry_irq(&dp->tsync, start));
-
+ u64 packets, bytes;
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++,txp++) {
+ do {
+ start = u64_stats_fetch_begin_irq(&txp->rsync);
+ packets = txp->rx_packets;
+ bytes = txp->rx_bytes;
+ } while (u64_stats_fetch_retry_irq(&txp->rsync, start));
+ stats->rx_packets += packets;
+ stats->rx_bytes += bytes;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&txp->tsync);
+ packets = txp->tx_packets;
+ bytes = txp->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&txp->tsync, start));
+ stats->tx_packets += packets;
+ stats->tx_bytes += bytes;
+ }
stats->rx_dropped = dev->stats.rx_dropped;
stats->tx_dropped = dev->stats.tx_dropped;
return stats;
}
+static int ifb_dev_init(struct net_device *dev)
+{
+ struct ifb_dev_private *dp = netdev_priv(dev);
+ struct ifb_q_private *txp;
+ int i;
+
+ txp = kcalloc(dev->num_tx_queues, sizeof(*txp), GFP_KERNEL);
+ if (!txp)
+ return -ENOMEM;
+ dp->tx_private = txp;
+ for (i = 0; i < dev->num_tx_queues; i++,txp++) {
+ txp->txqnum = i;
+ txp->dev = dev;
+ __skb_queue_head_init(&txp->rq);
+ __skb_queue_head_init(&txp->tq);
+ u64_stats_init(&txp->rsync);
+ u64_stats_init(&txp->tsync);
+ tasklet_init(&txp->ifb_tasklet, ifb_ri_tasklet,
+ (unsigned long)txp);
+ netif_tx_start_queue(netdev_get_tx_queue(dev, i));
+ }
+ return 0;
+}
static const struct net_device_ops ifb_netdev_ops = {
.ndo_open = ifb_open,
@@ -162,6 +191,7 @@ static const struct net_device_ops ifb_netdev_ops = {
.ndo_get_stats64 = ifb_stats64,
.ndo_start_xmit = ifb_xmit,
.ndo_validate_addr = eth_validate_addr,
+ .ndo_init = ifb_dev_init,
};
#define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | \
@@ -169,10 +199,24 @@ static const struct net_device_ops ifb_netdev_ops = {
NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX | \
NETIF_F_HW_VLAN_STAG_TX)
+static void ifb_dev_free(struct net_device *dev)
+{
+ struct ifb_dev_private *dp = netdev_priv(dev);
+ struct ifb_q_private *txp = dp->tx_private;
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++,txp++) {
+ tasklet_kill(&txp->ifb_tasklet);
+ __skb_queue_purge(&txp->rq);
+ __skb_queue_purge(&txp->tq);
+ }
+ kfree(dp->tx_private);
+ free_netdev(dev);
+}
+
static void ifb_setup(struct net_device *dev)
{
/* Initialize the device structure. */
- dev->destructor = free_netdev;
dev->netdev_ops = &ifb_netdev_ops;
/* Fill in device structure with ethernet-generic values. */
@@ -188,17 +232,19 @@ static void ifb_setup(struct net_device *dev)
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
netif_keep_dst(dev);
eth_hw_addr_random(dev);
+ dev->destructor = ifb_dev_free;
}
static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct ifb_private *dp = netdev_priv(dev);
+ struct ifb_dev_private *dp = netdev_priv(dev);
u32 from = G_TC_FROM(skb->tc_verd);
+ struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
- u64_stats_update_begin(&dp->rsync);
- dp->rx_packets++;
- dp->rx_bytes += skb->len;
- u64_stats_update_end(&dp->rsync);
+ u64_stats_update_begin(&txp->rsync);
+ txp->rx_packets++;
+ txp->rx_bytes += skb->len;
+ u64_stats_update_end(&txp->rsync);
if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) {
dev_kfree_skb(skb);
@@ -206,14 +252,13 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
- if (skb_queue_len(&dp->rq) >= dev->tx_queue_len) {
- netif_stop_queue(dev);
- }
+ if (skb_queue_len(&txp->rq) >= dev->tx_queue_len)
+ netif_tx_stop_queue(netdev_get_tx_queue(dev, txp->txqnum));
- __skb_queue_tail(&dp->rq, skb);
- if (!dp->tasklet_pending) {
- dp->tasklet_pending = 1;
- tasklet_schedule(&dp->ifb_tasklet);
+ __skb_queue_tail(&txp->rq, skb);
+ if (!txp->tasklet_pending) {
+ txp->tasklet_pending = 1;
+ tasklet_schedule(&txp->ifb_tasklet);
}
return NETDEV_TX_OK;
@@ -221,24 +266,13 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
static int ifb_close(struct net_device *dev)
{
- struct ifb_private *dp = netdev_priv(dev);
-
- tasklet_kill(&dp->ifb_tasklet);
- netif_stop_queue(dev);
- __skb_queue_purge(&dp->rq);
- __skb_queue_purge(&dp->tq);
+ netif_tx_stop_all_queues(dev);
return 0;
}
static int ifb_open(struct net_device *dev)
{
- struct ifb_private *dp = netdev_priv(dev);
-
- tasklet_init(&dp->ifb_tasklet, ri_tasklet, (unsigned long)dev);
- __skb_queue_head_init(&dp->rq);
- __skb_queue_head_init(&dp->tq);
- netif_start_queue(dev);
-
+ netif_tx_start_all_queues(dev);
return 0;
}
@@ -255,31 +289,30 @@ static int ifb_validate(struct nlattr *tb[], struct nlattr *data[])
static struct rtnl_link_ops ifb_link_ops __read_mostly = {
.kind = "ifb",
- .priv_size = sizeof(struct ifb_private),
+ .priv_size = sizeof(struct ifb_dev_private),
.setup = ifb_setup,
.validate = ifb_validate,
};
-/* Number of ifb devices to be set up by this module. */
+/* Number of ifb devices to be set up by this module.
+ * Note that these legacy devices have one queue.
+ * Prefer something like : ip link add ifb10 numtxqueues 8 type ifb
+ */
+static int numifbs = 2;
module_param(numifbs, int, 0);
MODULE_PARM_DESC(numifbs, "Number of ifb devices");
static int __init ifb_init_one(int index)
{
struct net_device *dev_ifb;
- struct ifb_private *dp;
int err;
- dev_ifb = alloc_netdev(sizeof(struct ifb_private), "ifb%d",
+ dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d",
NET_NAME_UNKNOWN, ifb_setup);
if (!dev_ifb)
return -ENOMEM;
- dp = netdev_priv(dev_ifb);
- u64_stats_init(&dp->rsync);
- u64_stats_init(&dp->tsync);
-
dev_ifb->rtnl_link_ops = &ifb_link_ops;
err = register_netdevice(dev_ifb);
if (err < 0)
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index f721444c2b0a..3320a179ee36 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -48,6 +48,8 @@
#define MII_M1011_IMASK_CLEAR 0x0000
#define MII_M1011_PHY_SCR 0x10
+#define MII_M1011_PHY_SCR_MDI 0x0000
+#define MII_M1011_PHY_SCR_MDI_X 0x0020
#define MII_M1011_PHY_SCR_AUTO_CROSS 0x0060
#define MII_M1145_PHY_EXT_SR 0x1b
@@ -159,6 +161,43 @@ static int marvell_config_intr(struct phy_device *phydev)
return err;
}
+static int marvell_set_polarity(struct phy_device *phydev, int polarity)
+{
+ int reg;
+ int err;
+ int val;
+
+ /* get the current settings */
+ reg = phy_read(phydev, MII_M1011_PHY_SCR);
+ if (reg < 0)
+ return reg;
+
+ val = reg;
+ val &= ~MII_M1011_PHY_SCR_AUTO_CROSS;
+ switch (polarity) {
+ case ETH_TP_MDI:
+ val |= MII_M1011_PHY_SCR_MDI;
+ break;
+ case ETH_TP_MDI_X:
+ val |= MII_M1011_PHY_SCR_MDI_X;
+ break;
+ case ETH_TP_MDI_AUTO:
+ case ETH_TP_MDI_INVALID:
+ default:
+ val |= MII_M1011_PHY_SCR_AUTO_CROSS;
+ break;
+ }
+
+ if (val != reg) {
+ /* Set the new polarity value in the register */
+ err = phy_write(phydev, MII_M1011_PHY_SCR, val);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int marvell_config_aneg(struct phy_device *phydev)
{
int err;
@@ -191,8 +230,7 @@ static int marvell_config_aneg(struct phy_device *phydev)
if (err < 0)
return err;
- err = phy_write(phydev, MII_M1011_PHY_SCR,
- MII_M1011_PHY_SCR_AUTO_CROSS);
+ err = marvell_set_polarity(phydev, phydev->mdix);
if (err < 0)
return err;
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index b2197b506acb..84b1fba58ac3 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -353,6 +353,8 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd)
phydev->duplex = cmd->duplex;
+ phydev->mdix = cmd->eth_tp_mdix_ctrl;
+
/* Restart the PHY */
phy_start_aneg(phydev);
@@ -377,6 +379,7 @@ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd)
cmd->transceiver = phy_is_internal(phydev) ?
XCVR_INTERNAL : XCVR_EXTERNAL;
cmd->autoneg = phydev->autoneg;
+ cmd->eth_tp_mdix_ctrl = phydev->mdix;
return 0;
}
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 8a495b318b6f..c6cb85a85c89 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -325,9 +325,6 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue)
queue->pending_prod + queue->pending_cons;
}
-/* Callback from stack when TX packet can be released */
-void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
-
irqreturn_t xenvif_interrupt(int irq, void *dev_id);
extern bool separate_tx_rx_irq;
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 82806c60aa42..1319a6bb6b82 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -94,7 +94,6 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb)
struct inet6_skb_parm {
int iif;
__be16 ra;
- __u16 hop;
__u16 dst0;
__u16 srcrt;
__u16 dst1;
@@ -111,6 +110,7 @@ struct inet6_skb_parm {
#define IP6SKB_REROUTED 4
#define IP6SKB_ROUTERALERT 8
#define IP6SKB_FRAGMENTED 16
+#define IP6SKB_HOPBYHOP 32
};
#define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb))
diff --git a/include/linux/phy.h b/include/linux/phy.h
index a26c3f84b8dd..e5fb1d415961 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -424,6 +424,8 @@ struct phy_device {
struct net_device *attached_dev;
+ u8 mdix;
+
void (*adjust_link)(struct net_device *dev);
};
#define to_phy_device(d) container_of(d, struct phy_device, dev)
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 3ee4c92afd1b..8d2a707a9e87 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -21,6 +21,8 @@ struct tcf_common {
struct gnet_stats_rate_est64 tcfc_rate_est;
spinlock_t tcfc_lock;
struct rcu_head tcfc_rcu;
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+ struct gnet_stats_queue __percpu *cpu_qstats;
};
#define tcf_head common.tcfc_head
#define tcf_index common.tcfc_index
@@ -68,6 +70,17 @@ static inline void tcf_hashinfo_destroy(struct tcf_hashinfo *hf)
kfree(hf->htab);
}
+/* Update lastuse only if needed, to avoid dirtying a cache line.
+ * We use a temp variable to avoid fetching jiffies twice.
+ */
+static inline void tcf_lastuse_update(struct tcf_t *tm)
+{
+ unsigned long now = jiffies;
+
+ if (tm->lastuse != now)
+ tm->lastuse = now;
+}
+
#ifdef CONFIG_NET_CLS_ACT
#define ACT_P_CREATED 1
@@ -103,7 +116,7 @@ int tcf_hash_release(struct tc_action *a, int bind);
u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo);
int tcf_hash_check(u32 index, struct tc_action *a, int bind);
int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
- int size, int bind);
+ int size, int bind, bool cpustats);
void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est);
void tcf_hash_insert(struct tc_action *a);
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index b73c88a19dd4..b07d126694a7 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -205,8 +205,8 @@ void inet_put_port(struct sock *sk);
void inet_hashinfo_init(struct inet_hashinfo *h);
-int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw);
-int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw);
+void __inet_hash_nolisten(struct sock *sk, struct sock *osk);
+void __inet_hash(struct sock *sk, struct sock *osk);
void inet_hash(struct sock *sk);
void inet_unhash(struct sock *sk);
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 360c4802288d..879d6e5a973b 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -100,10 +100,8 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
void inet_twsk_free(struct inet_timewait_sock *tw);
void inet_twsk_put(struct inet_timewait_sock *tw);
-int inet_twsk_unhash(struct inet_timewait_sock *tw);
-
-int inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
- struct inet_hashinfo *hashinfo);
+void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
+ struct inet_hashinfo *hashinfo);
struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
struct inet_timewait_death_row *dr,
@@ -113,7 +111,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
struct inet_hashinfo *hashinfo);
void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo);
-void inet_twsk_deschedule(struct inet_timewait_sock *tw);
+void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
void inet_twsk_purge(struct inet_hashinfo *hashinfo,
struct inet_timewait_death_row *twdr, int family);
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 8d93544a2d2b..c0368db6df54 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -31,6 +31,7 @@ struct netns_sysctl_ipv6 {
int auto_flowlabels;
int icmpv6_time;
int anycast_src_echo_reply;
+ int ip_nonlocal_bind;
int fwmark_reflect;
int idgen_retries;
int idgen_delay;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2738f6f87908..2eab08c38e32 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -513,17 +513,20 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
}
-static inline void qdisc_bstats_update_cpu(struct Qdisc *sch,
- const struct sk_buff *skb)
+static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
+ const struct sk_buff *skb)
{
- struct gnet_stats_basic_cpu *bstats =
- this_cpu_ptr(sch->cpu_bstats);
-
u64_stats_update_begin(&bstats->syncp);
bstats_update(&bstats->bstats, skb);
u64_stats_update_end(&bstats->syncp);
}
+static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
+ const struct sk_buff *skb)
+{
+ bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb);
+}
+
static inline void qdisc_bstats_update(struct Qdisc *sch,
const struct sk_buff *skb)
{
@@ -547,16 +550,24 @@ static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count)
sch->qstats.drops += count;
}
-static inline void qdisc_qstats_drop(struct Qdisc *sch)
+static inline void qstats_drop_inc(struct gnet_stats_queue *qstats)
{
- sch->qstats.drops++;
+ qstats->drops++;
}
-static inline void qdisc_qstats_drop_cpu(struct Qdisc *sch)
+static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats)
{
- struct gnet_stats_queue *qstats = this_cpu_ptr(sch->cpu_qstats);
+ qstats->overlimits++;
+}
- qstats->drops++;
+static inline void qdisc_qstats_drop(struct Qdisc *sch)
+{
+ qstats_drop_inc(&sch->qstats);
+}
+
+static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch)
+{
+ qstats_drop_inc(this_cpu_ptr(sch->cpu_qstats));
}
static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index 9fc9b578908a..592a6bc02b0b 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -6,9 +6,10 @@
struct tcf_gact {
struct tcf_common common;
#ifdef CONFIG_GACT_PROB
- u16 tcfg_ptype;
- u16 tcfg_pval;
- int tcfg_paction;
+ u16 tcfg_ptype;
+ u16 tcfg_pval;
+ int tcfg_paction;
+ atomic_t packets;
#endif
};
#define to_gact(a) \
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 4dd77a1c106b..dae96bae1c19 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -8,7 +8,7 @@ struct tcf_mirred {
int tcfm_eaction;
int tcfm_ifindex;
int tcfm_ok_push;
- struct net_device *tcfm_dev;
+ struct net_device __rcu *tcfm_dev;
struct list_head tcfm_list;
};
#define to_mirred(a) \
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 950cfecaad3c..364426a2be5a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -989,6 +989,11 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
#define TCP_INFINITE_SSTHRESH 0x7fffffff
+static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
+{
+ return tp->snd_cwnd < tp->snd_ssthresh;
+}
+
static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
{
return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
@@ -1065,7 +1070,7 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
const struct tcp_sock *tp = tcp_sk(sk);
/* If in slow start, ensure cwnd grows to twice what was ACKed. */
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
return tp->snd_cwnd < 2 * tp->max_packets_out;
return tp->is_cwnd_limited;
diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h
index 68f0ecad6c6e..1a47946f95ba 100644
--- a/include/net/timewait_sock.h
+++ b/include/net/timewait_sock.h
@@ -33,9 +33,6 @@ static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
static inline void twsk_destructor(struct sock *sk)
{
- BUG_ON(sk == NULL);
- BUG_ON(sk->sk_prot == NULL);
- BUG_ON(sk->sk_prot->twsk_prot == NULL);
if (sk->sk_prot->twsk_prot->twsk_destructor != NULL)
sk->sk_prot->twsk_prot->twsk_destructor(sk);
}
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index eaaea6208b42..3635b7797508 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -182,6 +182,7 @@ struct br_mdb_entry {
#define MDB_TEMPORARY 0
#define MDB_PERMANENT 1
__u8 state;
+ __u16 vid;
struct {
union {
__be32 ip4;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c5bedc82bc1c..bf38f5e8196c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -453,7 +453,11 @@ select_insn:
if (unlikely(!prog))
goto out;
- ARG1 = BPF_R1;
+ /* ARG1 at this point is guaranteed to point to CTX from
+ * the verifier side due to the fact that the tail call is
+ * handeled like a helper, that is, bpf_tail_call_proto,
+ * where arg1_type is ARG_PTR_TO_CTX.
+ */
insn = prog->insnsi;
goto select_insn;
out:
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 7f58c735d745..9198f28a5528 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -3674,6 +3674,9 @@ static struct bpf_test tests[] = {
.u.insns_int = {
BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
BPF_ENDIAN(BPF_FROM_BE, R0, 32),
+ BPF_ALU64_REG(BPF_MOV, R1, R0),
+ BPF_ALU64_IMM(BPF_RSH, R1, 32),
+ BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
BPF_EXIT_INSN(),
},
INTERNAL,
@@ -3708,6 +3711,9 @@ static struct bpf_test tests[] = {
.u.insns_int = {
BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
BPF_ENDIAN(BPF_FROM_LE, R0, 32),
+ BPF_ALU64_REG(BPF_MOV, R1, R0),
+ BPF_ALU64_IMM(BPF_RSH, R1, 32),
+ BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
BPF_EXIT_INSN(),
},
INTERNAL,
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index c11cf2611db0..9f7cdd27b762 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -85,6 +85,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
memset(&e, 0, sizeof(e));
e.ifindex = port->dev->ifindex;
e.state = p->state;
+ e.vid = p->addr.vid;
if (p->addr.proto == htons(ETH_P_IP))
e.addr.u.ip4 = p->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
@@ -230,7 +231,7 @@ errout:
}
void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
- struct br_ip *group, int type)
+ struct br_ip *group, int type, u8 state)
{
struct br_mdb_entry entry;
@@ -241,6 +242,8 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
#if IS_ENABLED(CONFIG_IPV6)
entry.addr.u.ip6 = group->u.ip6;
#endif
+ entry.state = state;
+ entry.vid = group->vid;
__br_mdb_notify(dev, &entry, type);
}
@@ -263,6 +266,8 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry)
return false;
if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY)
return false;
+ if (entry->vid >= VLAN_VID_MASK)
+ return false;
return true;
}
@@ -351,7 +356,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
if (state == MDB_TEMPORARY)
mod_timer(&p->timer, now + br->multicast_membership_interval);
- br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
+ br_mdb_notify(br->dev, port, group, RTM_NEWMDB, state);
return 0;
}
@@ -375,6 +380,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
return -EINVAL;
memset(&ip, 0, sizeof(ip));
+ ip.vid = entry->vid;
ip.proto = entry->addr.proto;
if (ip.proto == htons(ETH_P_IP))
ip.u.ip4 = entry->addr.u.ip4;
@@ -422,6 +428,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
return -EINVAL;
memset(&ip, 0, sizeof(ip));
+ ip.vid = entry->vid;
ip.proto = entry->addr.proto;
if (ip.proto == htons(ETH_P_IP))
ip.u.ip4 = entry->addr.u.ip4;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 742a6c27d7a2..5a44cd9473f2 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -694,7 +694,7 @@ static int br_multicast_add_group(struct net_bridge *br,
if (unlikely(!p))
goto err;
rcu_assign_pointer(*pp, p);
- br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
+ br_mdb_notify(br->dev, port, group, RTM_NEWMDB, MDB_TEMPORARY);
found:
mod_timer(&p->timer, now + br->multicast_membership_interval);
@@ -1439,8 +1439,9 @@ br_multicast_leave_group(struct net_bridge *br,
rcu_assign_pointer(*pp, p->next);
hlist_del_init(&p->mglist);
del_timer(&p->timer);
+ br_mdb_notify(br->dev, port, group, RTM_DELMDB,
+ p->state);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
- br_mdb_notify(br->dev, port, group, RTM_DELMDB);
if (!mp->ports && !mp->mglist &&
netif_running(br->dev))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 8b21146b24a0..c73fd785654d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -488,7 +488,7 @@ br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
void br_mdb_init(void);
void br_mdb_uninit(void);
void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
- struct br_ip *group, int type);
+ struct br_ip *group, int type, u8 state);
#define mlock_dereference(X, br) \
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
diff --git a/net/core/dev.c b/net/core/dev.c
index a8e4dd430285..69445a33ace6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3645,7 +3645,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
qdisc_skb_cb(skb)->pkt_len = skb->len;
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
- qdisc_bstats_update_cpu(cl->q, skb);
+ qdisc_bstats_cpu_update(cl->q, skb);
switch (tc_classify(skb, cl, &cl_res)) {
case TC_ACT_OK:
@@ -3653,7 +3653,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_drop_cpu(cl->q);
+ qdisc_qstats_cpu_drop(cl->q);
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
kfree_skb(skb);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 43d3dd62fcc8..42689d5c468c 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -60,11 +60,15 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
struct phy_device *phydev;
unsigned int type;
+ if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->drv)
+ return false;
+
if (skb_headroom(skb) < ETH_HLEN)
return false;
+
__skb_push(skb, ETH_HLEN);
- type = classify(skb);
+ type = ptp_classify_raw(skb);
__skb_pull(skb, ETH_HLEN);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 5f9b063bbe8a..f8b3701a6c3c 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -343,7 +343,6 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk2;
const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw = NULL;
- int twrefcnt = 0;
spin_lock(lock);
@@ -371,21 +370,17 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain);
if (tw) {
- twrefcnt = inet_twsk_unhash(tw);
+ sk_nulls_del_node_init_rcu((struct sock *)tw);
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
}
spin_unlock(lock);
- if (twrefcnt)
- inet_twsk_put(tw);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
if (twp) {
*twp = tw;
} else if (tw) {
/* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw);
-
- inet_twsk_put(tw);
+ inet_twsk_deschedule_put(tw);
}
return 0;
@@ -403,13 +398,12 @@ static u32 inet_sk_port_offset(const struct sock *sk)
inet->inet_dport);
}
-int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
+void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_nulls_head *list;
struct inet_ehash_bucket *head;
spinlock_t *lock;
- int twrefcnt = 0;
WARN_ON(!sk_unhashed(sk));
@@ -420,23 +414,22 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
spin_lock(lock);
__sk_nulls_add_node_rcu(sk, list);
- if (tw) {
- WARN_ON(sk->sk_hash != tw->tw_hash);
- twrefcnt = inet_twsk_unhash(tw);
+ if (osk) {
+ WARN_ON(sk->sk_hash != osk->sk_hash);
+ sk_nulls_del_node_init_rcu(osk);
}
spin_unlock(lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
- return twrefcnt;
}
EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
-int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw)
+void __inet_hash(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
if (sk->sk_state != TCP_LISTEN)
- return __inet_hash_nolisten(sk, tw);
+ return __inet_hash_nolisten(sk, osk);
WARN_ON(!sk_unhashed(sk));
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
@@ -445,7 +438,6 @@ int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw)
__sk_nulls_add_node_rcu(sk, &ilb->head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
spin_unlock(&ilb->lock);
- return 0;
}
EXPORT_SYMBOL(__inet_hash);
@@ -492,7 +484,6 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct inet_bind_bucket *tb;
int ret;
struct net *net = sock_net(sk);
- int twrefcnt = 1;
if (!snum) {
int i, remaining, low, high, port;
@@ -560,19 +551,14 @@ ok:
inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
- twrefcnt += __inet_hash_nolisten(sk, tw);
+ __inet_hash_nolisten(sk, (struct sock *)tw);
}
if (tw)
- twrefcnt += inet_twsk_bind_unhash(tw, hinfo);
+ inet_twsk_bind_unhash(tw, hinfo);
spin_unlock(&head->lock);
- if (tw) {
- inet_twsk_deschedule(tw);
- while (twrefcnt) {
- twrefcnt--;
- inet_twsk_put(tw);
- }
- }
+ if (tw)
+ inet_twsk_deschedule_put(tw);
ret = 0;
goto out;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 2ffbd16b79e0..ae22cc24fbe8 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -18,28 +18,6 @@
/**
- * inet_twsk_unhash - unhash a timewait socket from established hash
- * @tw: timewait socket
- *
- * unhash a timewait socket from established hash, if hashed.
- * ehash lock must be held by caller.
- * Returns 1 if caller should call inet_twsk_put() after lock release.
- */
-int inet_twsk_unhash(struct inet_timewait_sock *tw)
-{
- if (hlist_nulls_unhashed(&tw->tw_node))
- return 0;
-
- hlist_nulls_del_rcu(&tw->tw_node);
- sk_nulls_node_init(&tw->tw_node);
- /*
- * We cannot call inet_twsk_put() ourself under lock,
- * caller must call it for us.
- */
- return 1;
-}
-
-/**
* inet_twsk_bind_unhash - unhash a timewait socket from bind hash
* @tw: timewait socket
* @hashinfo: hashinfo pointer
@@ -48,35 +26,29 @@ int inet_twsk_unhash(struct inet_timewait_sock *tw)
* bind hash lock must be held by caller.
* Returns 1 if caller should call inet_twsk_put() after lock release.
*/
-int inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
+void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
struct inet_hashinfo *hashinfo)
{
struct inet_bind_bucket *tb = tw->tw_tb;
if (!tb)
- return 0;
+ return;
__hlist_del(&tw->tw_bind_node);
tw->tw_tb = NULL;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
- /*
- * We cannot call inet_twsk_put() ourself under lock,
- * caller must call it for us.
- */
- return 1;
+ __sock_put((struct sock *)tw);
}
/* Must be called with locally disabled BHs. */
static void inet_twsk_kill(struct inet_timewait_sock *tw)
{
struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
- struct inet_bind_hashbucket *bhead;
- int refcnt;
- /* Unlink from established hashes. */
spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
+ struct inet_bind_hashbucket *bhead;
spin_lock(lock);
- refcnt = inet_twsk_unhash(tw);
+ sk_nulls_del_node_init_rcu((struct sock *)tw);
spin_unlock(lock);
/* Disassociate with bind bucket. */
@@ -84,11 +56,9 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
hashinfo->bhash_size)];
spin_lock(&bhead->lock);
- refcnt += inet_twsk_bind_unhash(tw, hashinfo);
+ inet_twsk_bind_unhash(tw, hashinfo);
spin_unlock(&bhead->lock);
- BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt));
- atomic_sub(refcnt, &tw->tw_refcnt);
atomic_dec(&tw->tw_dr->tw_count);
inet_twsk_put(tw);
}
@@ -235,13 +205,17 @@ EXPORT_SYMBOL_GPL(inet_twsk_alloc);
* tcp_input.c to verify this.
*/
-/* This is for handling early-kills of TIME_WAIT sockets. */
-void inet_twsk_deschedule(struct inet_timewait_sock *tw)
+/* This is for handling early-kills of TIME_WAIT sockets.
+ * Warning : consume reference.
+ * Caller should not access tw anymore.
+ */
+void inet_twsk_deschedule_put(struct inet_timewait_sock *tw)
{
if (del_timer_sync(&tw->tw_timer))
inet_twsk_kill(tw);
+ inet_twsk_put(tw);
}
-EXPORT_SYMBOL(inet_twsk_deschedule);
+EXPORT_SYMBOL(inet_twsk_deschedule_put);
void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo)
{
@@ -311,9 +285,8 @@ restart:
rcu_read_unlock();
local_bh_disable();
- inet_twsk_deschedule(tw);
+ inet_twsk_deschedule_put(tw);
local_bh_enable();
- inet_twsk_put(tw);
goto restart_rcu;
}
/* If the nulls value we got at the end of this lookup is
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a50dc6d408d1..4d3fffafbe24 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -522,7 +522,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
int len;
int ihlen;
int err;
- int sum_truesize;
u8 ecn;
ipq_kill(qp);
@@ -590,32 +589,19 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
add_frag_mem_limit(&qp->q, clone->truesize);
}
+ skb_shinfo(head)->frag_list = head->next;
skb_push(head, head->data - skb_network_header(head));
- sum_truesize = head->truesize;
- for (fp = head->next; fp;) {
- bool headstolen;
- int delta;
- struct sk_buff *next = fp->next;
-
- sum_truesize += fp->truesize;
+ for (fp=head->next; fp; fp = fp->next) {
+ head->data_len += fp->len;
+ head->len += fp->len;
if (head->ip_summed != fp->ip_summed)
head->ip_summed = CHECKSUM_NONE;
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
-
- if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
- kfree_skb_partial(fp, headstolen);
- } else {
- if (!skb_shinfo(head)->frag_list)
- skb_shinfo(head)->frag_list = fp;
- head->data_len += fp->len;
- head->len += fp->len;
- head->truesize += fp->truesize;
- }
- fp = next;
+ head->truesize += fp->truesize;
}
- sub_frag_mem_limit(&qp->q, sum_truesize);
+ sub_frag_mem_limit(&qp->q, head->truesize);
head->next = NULL;
head->dev = dev;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 05ff44b758df..e89094ab5ddb 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -363,7 +363,8 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
scoped);
rcu_read_unlock();
- if (!(isk->freebind || isk->transparent || has_addr ||
+ if (!(net->ipv6.sysctl.ip_nonlocal_bind ||
+ isk->freebind || isk->transparent || has_addr ||
addr_type == IPV6_ADDR_ANY))
return -EADDRNOTAVAIL;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d0362a2de3d3..04c83de4f79e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1546,7 +1546,6 @@ static int __mkroute_input(struct sk_buff *skb,
struct rtable *rth;
int err;
struct in_device *out_dev;
- unsigned int flags = 0;
bool do_cache;
u32 itag = 0;
@@ -1610,7 +1609,7 @@ static int __mkroute_input(struct sk_buff *skb,
}
rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev));
- rth->rt_flags = flags;
+ rth->rt_flags = 0;
rth->rt_type = res->type;
rth->rt_is_input = 1;
rth->rt_iif = 0;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index c037644eafb7..fd1405d37c14 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -146,7 +146,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else {
bictcp_update(ca, tp->snd_cwnd);
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 8c6fd3d5e40f..167b6a3e1b98 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -264,7 +264,7 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked)
u32 prior_snd_cwnd;
u32 incr;
- if (tp->snd_cwnd < tp->snd_ssthresh && hystart_detect)
+ if (tcp_in_slow_start(tp) && hystart_detect)
tcp_cdg_hystart_update(sk);
if (after(ack, ca->rtt_seq) && ca->rtt.v64) {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 84be008c945c..a2ed23c595cf 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -365,10 +365,8 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
*/
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
{
- u32 cwnd = tp->snd_cwnd + acked;
+ u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
- if (cwnd > tp->snd_ssthresh)
- cwnd = tp->snd_ssthresh + 1;
acked -= cwnd - tp->snd_cwnd;
tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
@@ -413,7 +411,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
/* In "safe" area, increase. */
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
+ if (tcp_in_slow_start(tp)) {
acked = tcp_slow_start(tp, acked);
if (!acked)
return;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 06d3d665a9fd..28011fb1f4a2 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -320,7 +320,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
+ if (tcp_in_slow_start(tp)) {
if (hystart && after(ack, ca->end_seq))
bictcp_hystart_reset(sk);
acked = tcp_slow_start(tp, acked);
@@ -439,7 +439,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
ca->delay_min = delay;
/* hystart triggers when cwnd is larger than some threshold */
- if (hystart && tp->snd_cwnd <= tp->snd_ssthresh &&
+ if (hystart && tcp_in_slow_start(tp) &&
tp->snd_cwnd >= hystart_low_window)
hystart_update(sk, delay);
}
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 882c08aae2f5..db7842495a64 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -116,7 +116,7 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else {
/* Update AIMD parameters.
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 58469fff6c18..82f0d9ed60f5 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -236,7 +236,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else {
/* In dangerous area, increase slowly.
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index f963b274f2b0..083831e359df 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -112,7 +112,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
rho_fractions = ca->rho_3ls - (ca->rho << 3);
- if (tp->snd_cwnd < tp->snd_ssthresh) {
+ if (tcp_in_slow_start(tp)) {
/*
* slow start
* INC = 2^RHO - 1
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index f71002e4db0b..2ab9bbb6faff 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -268,7 +268,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
/* In slow start */
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 684f095d196e..1578fc2a6f39 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -109,6 +109,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
#define FLAG_DATA_SACKED 0x20 /* New SACK. */
#define FLAG_ECE 0x40 /* ECE in this ACK */
+#define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */
#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -196,11 +197,13 @@ static void tcp_enter_quickack_mode(struct sock *sk)
* and the session is not interactive.
*/
-static inline bool tcp_in_quickack_mode(const struct sock *sk)
+static bool tcp_in_quickack_mode(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct dst_entry *dst = __sk_dst_get(sk);
- return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
+ return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
+ (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);
}
static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
@@ -1037,7 +1040,7 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
* highest SACK block). Also calculate the lowest snd_nxt among the remaining
* retransmitted skbs to avoid some costly processing per ACKs.
*/
-static void tcp_mark_lost_retrans(struct sock *sk)
+static void tcp_mark_lost_retrans(struct sock *sk, int *flag)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -1078,7 +1081,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
if (after(received_upto, ack_seq)) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
tp->retrans_out -= tcp_skb_pcount(skb);
-
+ *flag |= FLAG_LOST_RETRANS;
tcp_skb_mark_lost_uncond_verify(tp, skb);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
} else {
@@ -1818,7 +1821,7 @@ advance_sp:
((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
- tcp_mark_lost_retrans(sk);
+ tcp_mark_lost_retrans(sk, &state->flag);
tcp_verify_left_out(tp);
out:
@@ -2475,15 +2478,14 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
return false;
}
-/* The cwnd reduction in CWR and Recovery use the PRR algorithm
- * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/
+/* The cwnd reduction in CWR and Recovery uses the PRR algorithm in RFC 6937.
* It computes the number of packets to send (sndcnt) based on packets newly
* delivered:
* 1) If the packets in flight is larger than ssthresh, PRR spreads the
* cwnd reductions across a full RTT.
- * 2) If packets in flight is lower than ssthresh (such as due to excess
- * losses and/or application stalls), do not perform any further cwnd
- * reductions, but instead slow start up to ssthresh.
+ * 2) Otherwise PRR uses packet conservation to send as much as delivered.
+ * But when the retransmits are acked without further losses, PRR
+ * slow starts cwnd up to ssthresh to speed up the recovery.
*/
static void tcp_init_cwnd_reduction(struct sock *sk)
{
@@ -2500,7 +2502,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
}
static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
- int fast_rexmit)
+ int fast_rexmit, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
int sndcnt = 0;
@@ -2509,16 +2511,18 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
(tp->packets_out - tp->sacked_out);
tp->prr_delivered += newly_acked_sacked;
- if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
+ if (delta < 0) {
u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
tp->prior_cwnd - 1;
sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
- } else {
+ } else if ((flag & FLAG_RETRANS_DATA_ACKED) &&
+ !(flag & FLAG_LOST_RETRANS)) {
sndcnt = min_t(int, delta,
max_t(int, tp->prr_delivered - tp->prr_out,
newly_acked_sacked) + 1);
+ } else {
+ sndcnt = min(delta, newly_acked_sacked);
}
-
sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
}
@@ -2579,7 +2583,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
tcp_try_keep_open(sk);
} else {
- tcp_cwnd_reduction(sk, prior_unsacked, 0);
+ tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
}
}
@@ -2676,7 +2680,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
tp->prior_ssthresh = 0;
tcp_init_undo(tp);
- if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+ if (!tcp_in_cwnd_reduction(sk)) {
if (!ece_ack)
tp->prior_ssthresh = tcp_current_ssthresh(sk);
tcp_init_cwnd_reduction(sk);
@@ -2736,7 +2740,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
/* Undo during fast recovery after partial ACK. */
static bool tcp_try_undo_partial(struct sock *sk, const int acked,
- const int prior_unsacked)
+ const int prior_unsacked, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2752,7 +2756,7 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked,
* mark more packets lost or retransmit more.
*/
if (tp->retrans_out) {
- tcp_cwnd_reduction(sk, prior_unsacked, 0);
+ tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
return true;
}
@@ -2839,7 +2843,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
if (tcp_is_reno(tp) && is_dupack)
tcp_add_reno_sack(sk);
} else {
- if (tcp_try_undo_partial(sk, acked, prior_unsacked))
+ if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag))
return;
/* Partial ACK arrived. Force fast retransmit. */
do_lost = tcp_is_reno(tp) ||
@@ -2852,9 +2856,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag, is_dupack);
- if (icsk->icsk_ca_state != TCP_CA_Open)
+ if (icsk->icsk_ca_state != TCP_CA_Open &&
+ !(flag & FLAG_LOST_RETRANS))
return;
- /* Fall through to processing in Open state. */
+ /* Change state if cwnd is undone or retransmits are lost */
default:
if (tcp_is_reno(tp)) {
if (flag & FLAG_SND_UNA_ADVANCED)
@@ -2889,7 +2894,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
if (do_lost)
tcp_update_scoreboard(sk, fast_rexmit);
- tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit);
+ tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag);
tcp_xmit_retransmit_queue(sk);
}
@@ -3563,10 +3568,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
&sack_state);
acked -= tp->packets_out;
- /* Advance cwnd if state allows */
- if (tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, acked);
-
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
tcp_fastretrans_alert(sk, acked, prior_unsacked,
@@ -3575,6 +3576,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
+ /* Advance cwnd if state allows */
+ if (tcp_may_raise_cwnd(sk, flag))
+ tcp_cong_avoid(sk, ack, acked);
+
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
struct dst_entry *dst = __sk_dst_get(sk);
if (dst)
@@ -3948,7 +3953,6 @@ void tcp_reset(struct sock *sk)
static void tcp_fin(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- const struct dst_entry *dst;
inet_csk_schedule_ack(sk);
@@ -3960,9 +3964,7 @@ static void tcp_fin(struct sock *sk)
case TCP_ESTABLISHED:
/* Move to CLOSE_WAIT */
tcp_set_state(sk, TCP_CLOSE_WAIT);
- dst = __sk_dst_get(sk);
- if (!dst || !dst_metric(dst, RTAX_QUICKACK))
- inet_csk(sk)->icsk_ack.pingpong = 1;
+ inet_csk(sk)->icsk_ack.pingpong = 1;
break;
case TCP_CLOSE_WAIT:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d7d4c2b79cf2..486ba96ae91a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1683,8 +1683,7 @@ do_time_wait:
iph->daddr, th->dest,
inet_iif(skb));
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk));
- inet_twsk_put(inet_twsk(sk));
+ inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
goto process;
}
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index a51d63a43e33..b3d64f61d922 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -461,7 +461,7 @@ void tcp_update_metrics(struct sock *sk)
tcp_metric_set(tm, TCP_METRIC_CWND,
tp->snd_cwnd);
}
- } else if (tp->snd_cwnd > tp->snd_ssthresh &&
+ } else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4bc00cb79e60..6d8795b066ac 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -147,8 +147,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
if (!th->fin ||
TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
kill_with_rst:
- inet_twsk_deschedule(tw);
- inet_twsk_put(tw);
+ inet_twsk_deschedule_put(tw);
return TCP_TW_RST;
}
@@ -198,8 +197,7 @@ kill_with_rst:
*/
if (sysctl_tcp_rfc1337 == 0) {
kill:
- inet_twsk_deschedule(tw);
- inet_twsk_put(tw);
+ inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS;
}
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b1c218df2c85..71057849593a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -163,7 +163,6 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
{
struct inet_connection_sock *icsk = inet_csk(sk);
const u32 now = tcp_time_stamp;
- const struct dst_entry *dst = __sk_dst_get(sk);
if (sysctl_tcp_slow_start_after_idle &&
(!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
@@ -174,9 +173,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
/* If it is a reply for ato after last received
* packet, enter pingpong mode.
*/
- if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato &&
- (!dst || !dst_metric(dst, RTAX_QUICKACK)))
- icsk->icsk_ack.pingpong = 1;
+ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+ icsk->icsk_ack.pingpong = 1;
}
/* Account for an ACK we sent. */
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 333bcb2415ff..bf5ea9e9bbc1 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -22,7 +22,7 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else
tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 5b752f58a900..7149ebc820c7 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -649,4 +649,3 @@ void tcp_init_xmit_timers(struct sock *sk)
inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
&tcp_keepalive_timer);
}
-EXPORT_SYMBOL(tcp_init_xmit_timers);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index a6cea1d5e20d..13951c4087d4 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -225,7 +225,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
*/
diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
- if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) {
+ if (diff > gamma && tcp_in_slow_start(tp)) {
/* Going too fast. Time to slow down
* and switch to congestion avoidance.
*/
@@ -240,7 +240,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
- } else if (tp->snd_cwnd <= tp->snd_ssthresh) {
+ } else if (tcp_in_slow_start(tp)) {
/* Slow start. */
tcp_slow_start(tp, acked);
} else {
@@ -281,7 +281,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
vegas->minRTT = 0x7fffffff;
}
/* Use normal slow start */
- else if (tp->snd_cwnd <= tp->snd_ssthresh)
+ else if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
}
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 112151eeee45..0d094b995cd9 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -150,7 +150,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd;
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
+ if (tcp_in_slow_start(tp)) {
/* Slow start. */
tcp_slow_start(tp, acked);
} else {
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 438a73aa777c..643f61339e7b 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -5,16 +5,15 @@
# IPv6 as module will cause a CRASH if you try to unload it
menuconfig IPV6
tristate "The IPv6 protocol"
- default m
+ default y
---help---
- This is complemental support for the IP version 6.
- You will still be able to do traditional IPv4 networking as well.
+ Support for IP version 6 (IPv6).
For general information about IPv6, see
<https://en.wikipedia.org/wiki/IPv6>.
- For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
- For specific information about IPv6 under Linux, read the HOWTO at
- <http://www.bieringer.de/linux/IPv6/>.
+ For specific information about IPv6 under Linux, see
+ Documentation/networking/ipv6.txt and read the HOWTO at
+ <http://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/>
To compile this protocol support as a module, choose M here: the
module will be called ipv6.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 21c2c818df3b..4ab74d56f65a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1358,15 +1358,94 @@ out:
return ret;
}
+static void __ipv6_dev_get_saddr(struct net *net,
+ struct ipv6_saddr_dst *dst,
+ unsigned int prefs,
+ const struct in6_addr *saddr,
+ struct inet6_dev *idev,
+ struct ipv6_saddr_score *scores)
+{
+ struct ipv6_saddr_score *score = &scores[0], *hiscore = &scores[1];
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
+ int i;
+
+ /*
+ * - Tentative Address (RFC2462 section 5.4)
+ * - A tentative address is not considered
+ * "assigned to an interface" in the traditional
+ * sense, unless it is also flagged as optimistic.
+ * - Candidate Source Address (section 4)
+ * - In any case, anycast addresses, multicast
+ * addresses, and the unspecified address MUST
+ * NOT be included in a candidate set.
+ */
+ if ((score->ifa->flags & IFA_F_TENTATIVE) &&
+ (!(score->ifa->flags & IFA_F_OPTIMISTIC)))
+ continue;
+
+ score->addr_type = __ipv6_addr_type(&score->ifa->addr);
+
+ if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
+ score->addr_type & IPV6_ADDR_MULTICAST)) {
+ net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s",
+ idev->dev->name);
+ continue;
+ }
+
+ score->rule = -1;
+ bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
+
+ for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
+ int minihiscore, miniscore;
+
+ minihiscore = ipv6_get_saddr_eval(net, hiscore, dst, i);
+ miniscore = ipv6_get_saddr_eval(net, score, dst, i);
+
+ if (minihiscore > miniscore) {
+ if (i == IPV6_SADDR_RULE_SCOPE &&
+ score->scopedist > 0) {
+ /*
+ * special case:
+ * each remaining entry
+ * has too small (not enough)
+ * scope, because ifa entries
+ * are sorted by their scope
+ * values.
+ */
+ goto out;
+ }
+ break;
+ } else if (minihiscore < miniscore) {
+ if (hiscore->ifa)
+ in6_ifa_put(hiscore->ifa);
+
+ in6_ifa_hold(score->ifa);
+
+ swap(hiscore, score);
+
+ /* restore our iterator */
+ score->ifa = hiscore->ifa;
+
+ break;
+ }
+ }
+ }
+out:
+ read_unlock_bh(&idev->lock);
+}
+
int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
const struct in6_addr *daddr, unsigned int prefs,
struct in6_addr *saddr)
{
- struct ipv6_saddr_score scores[2],
- *score = &scores[0], *hiscore = &scores[1];
+ struct ipv6_saddr_score scores[2], *hiscore = &scores[1];
struct ipv6_saddr_dst dst;
+ struct inet6_dev *idev;
struct net_device *dev;
int dst_type;
+ bool use_oif_addr = false;
dst_type = __ipv6_addr_type(daddr);
dst.addr = daddr;
@@ -1380,97 +1459,35 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
- struct inet6_dev *idev;
-
- /* Candidate Source Address (section 4)
- * - multicast and link-local destination address,
- * the set of candidate source address MUST only
- * include addresses assigned to interfaces
- * belonging to the same link as the outgoing
- * interface.
- * (- For site-local destination addresses, the
- * set of candidate source addresses MUST only
- * include addresses assigned to interfaces
- * belonging to the same site as the outgoing
- * interface.)
- */
- if (((dst_type & IPV6_ADDR_MULTICAST) ||
- dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) &&
- dst.ifindex && dev->ifindex != dst.ifindex)
- continue;
-
- idev = __in6_dev_get(dev);
- if (!idev)
- continue;
-
- read_lock_bh(&idev->lock);
- list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
- int i;
-
- /*
- * - Tentative Address (RFC2462 section 5.4)
- * - A tentative address is not considered
- * "assigned to an interface" in the traditional
- * sense, unless it is also flagged as optimistic.
- * - Candidate Source Address (section 4)
- * - In any case, anycast addresses, multicast
- * addresses, and the unspecified address MUST
- * NOT be included in a candidate set.
- */
- if ((score->ifa->flags & IFA_F_TENTATIVE) &&
- (!(score->ifa->flags & IFA_F_OPTIMISTIC)))
- continue;
-
- score->addr_type = __ipv6_addr_type(&score->ifa->addr);
+ /* Candidate Source Address (section 4)
+ * - multicast and link-local destination address,
+ * the set of candidate source address MUST only
+ * include addresses assigned to interfaces
+ * belonging to the same link as the outgoing
+ * interface.
+ * (- For site-local destination addresses, the
+ * set of candidate source addresses MUST only
+ * include addresses assigned to interfaces
+ * belonging to the same site as the outgoing
+ * interface.)
+ */
+ if (dst_dev) {
+ if ((dst_type & IPV6_ADDR_MULTICAST) ||
+ dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) {
+ idev = __in6_dev_get(dst_dev);
+ use_oif_addr = true;
+ }
+ }
- if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
- score->addr_type & IPV6_ADDR_MULTICAST)) {
- net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s",
- dev->name);
+ if (use_oif_addr) {
+ __ipv6_dev_get_saddr(net, &dst, prefs, saddr, idev, scores);
+ } else {
+ for_each_netdev_rcu(net, dev) {
+ idev = __in6_dev_get(dev);
+ if (!idev)
continue;
- }
-
- score->rule = -1;
- bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
-
- for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
- int minihiscore, miniscore;
-
- minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i);
- miniscore = ipv6_get_saddr_eval(net, score, &dst, i);
-
- if (minihiscore > miniscore) {
- if (i == IPV6_SADDR_RULE_SCOPE &&
- score->scopedist > 0) {
- /*
- * special case:
- * each remaining entry
- * has too small (not enough)
- * scope, because ifa entries
- * are sorted by their scope
- * values.
- */
- goto try_nextdev;
- }
- break;
- } else if (minihiscore < miniscore) {
- if (hiscore->ifa)
- in6_ifa_put(hiscore->ifa);
-
- in6_ifa_hold(score->ifa);
-
- swap(hiscore, score);
-
- /* restore our iterator */
- score->ifa = hiscore->ifa;
-
- break;
- }
- }
+ __ipv6_dev_get_saddr(net, &dst, prefs, saddr, idev, scores);
}
-try_nextdev:
- read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 7de52b65173f..7bc92ea4ae8f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -342,7 +342,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
*/
v4addr = LOOPBACK4_IPV6;
if (!(addr_type & IPV6_ADDR_MULTICAST)) {
- if (!(inet->freebind || inet->transparent) &&
+ if (!net->ipv6.sysctl.ip_nonlocal_bind &&
+ !(inet->freebind || inet->transparent) &&
!ipv6_chk_addr(net, &addr->sin6_addr,
dev, 0)) {
err = -EADDRNOTAVAIL;
@@ -679,8 +680,8 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
const struct ipv6_pinfo *np = inet6_sk(sk);
if (np->rxopt.all) {
- if ((opt->hop && (np->rxopt.bits.hopopts ||
- np->rxopt.bits.ohopopts)) ||
+ if (((opt->flags & IP6SKB_HOPBYHOP) &&
+ (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
(ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) &&
np->rxopt.bits.rxflow) ||
(opt->srcrt && (np->rxopt.bits.srcrt ||
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 62d908e64eeb..50115522e80f 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -558,8 +558,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
}
/* HbH is allowed only once */
- if (np->rxopt.bits.hopopts && opt->hop) {
- u8 *ptr = nh + opt->hop;
+ if (np->rxopt.bits.hopopts && (opt->flags & IP6SKB_HOPBYHOP)) {
+ u8 *ptr = nh + sizeof(struct ipv6hdr);
put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
}
@@ -620,8 +620,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
int hlim = ipv6_hdr(skb)->hop_limit;
put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
}
- if (np->rxopt.bits.ohopopts && opt->hop) {
- u8 *ptr = nh + opt->hop;
+ if (np->rxopt.bits.ohopopts && (opt->flags & IP6SKB_HOPBYHOP)) {
+ u8 *ptr = nh + sizeof(struct ipv6hdr);
put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
}
if (np->rxopt.bits.odstopts && opt->dst0) {
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index a7bbbe45570b..ce203b0402be 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -632,7 +632,7 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
return -1;
}
- opt->hop = sizeof(struct ipv6hdr);
+ opt->flags |= IP6SKB_HOPBYHOP;
if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b4fd96de97e6..6ac8dad0138a 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -207,7 +207,6 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk2;
const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw = NULL;
- int twrefcnt = 0;
spin_lock(lock);
@@ -234,21 +233,17 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain);
if (tw) {
- twrefcnt = inet_twsk_unhash(tw);
+ sk_nulls_del_node_init_rcu((struct sock *)tw);
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
}
spin_unlock(lock);
- if (twrefcnt)
- inet_twsk_put(tw);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
if (twp) {
*twp = tw;
} else if (tw) {
/* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw);
-
- inet_twsk_put(tw);
+ inet_twsk_deschedule_put(tw);
}
return 0;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ca4700cb26c4..fdbada1569a3 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -295,7 +295,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
* unspecified and mapped address have a v4 equivalent.
*/
v4addr = LOOPBACK4_IPV6;
- if (!(addr_type & IPV6_ADDR_MULTICAST)) {
+ if (!(addr_type & IPV6_ADDR_MULTICAST) &&
+ !sock_net(sk)->ipv6.sysctl.ip_nonlocal_bind) {
err = -EADDRNOTAVAIL;
if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr,
dev, 0)) {
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 4e705add4f18..db48aebd9c47 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -75,6 +75,13 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "ip_nonlocal_bind",
+ .data = &init_net.ipv6.sysctl.ip_nonlocal_bind,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
@@ -117,6 +124,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries;
ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay;
ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
+ ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6748c4277aff..d540846a1a79 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1481,8 +1481,7 @@ do_time_wait:
ntohs(th->dest), tcp_v6_iif(skb));
if (sk2) {
struct inet_timewait_sock *tw = inet_twsk(sk);
- inet_twsk_deschedule(tw);
- inet_twsk_put(tw);
+ inet_twsk_deschedule_put(tw);
sk = sk2;
tcp_v6_restore_cb(skb);
goto process;
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index cca96cec1b68..d0c96c5ae29a 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -272,8 +272,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk));
- inet_twsk_put(inet_twsk(sk));
+ inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
}
}
@@ -437,8 +436,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
tgi->lport ? tgi->lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk));
- inet_twsk_put(inet_twsk(sk));
+ inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
}
}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index af427a3dbcba..074a32f466f8 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -27,6 +27,15 @@
#include <net/act_api.h>
#include <net/netlink.h>
+static void free_tcf(struct rcu_head *head)
+{
+ struct tcf_common *p = container_of(head, struct tcf_common, tcfc_rcu);
+
+ free_percpu(p->cpu_bstats);
+ free_percpu(p->cpu_qstats);
+ kfree(p);
+}
+
void tcf_hash_destroy(struct tc_action *a)
{
struct tcf_common *p = a->priv;
@@ -41,7 +50,7 @@ void tcf_hash_destroy(struct tc_action *a)
* gen_estimator est_timer() might access p->tcfc_lock
* or bstats, wait a RCU grace period before freeing p
*/
- kfree_rcu(p, tcfc_rcu);
+ call_rcu(&p->tcfc_rcu, free_tcf);
}
EXPORT_SYMBOL(tcf_hash_destroy);
@@ -230,15 +239,16 @@ void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
if (est)
gen_kill_estimator(&pc->tcfc_bstats,
&pc->tcfc_rate_est);
- kfree_rcu(pc, tcfc_rcu);
+ call_rcu(&pc->tcfc_rcu, free_tcf);
}
EXPORT_SYMBOL(tcf_hash_cleanup);
int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
- int size, int bind)
+ int size, int bind, bool cpustats)
{
struct tcf_hashinfo *hinfo = a->ops->hinfo;
struct tcf_common *p = kzalloc(size, GFP_KERNEL);
+ int err = -ENOMEM;
if (unlikely(!p))
return -ENOMEM;
@@ -246,18 +256,32 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
if (bind)
p->tcfc_bindcnt = 1;
+ if (cpustats) {
+ p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+ if (!p->cpu_bstats) {
+err1:
+ kfree(p);
+ return err;
+ }
+ p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+ if (!p->cpu_qstats) {
+err2:
+ free_percpu(p->cpu_bstats);
+ goto err1;
+ }
+ }
spin_lock_init(&p->tcfc_lock);
INIT_HLIST_NODE(&p->tcfc_head);
p->tcfc_index = index ? index : tcf_hash_new_index(hinfo);
p->tcfc_tm.install = jiffies;
p->tcfc_tm.lastuse = jiffies;
if (est) {
- int err = gen_new_estimator(&p->tcfc_bstats, NULL,
- &p->tcfc_rate_est,
- &p->tcfc_lock, est);
+ err = gen_new_estimator(&p->tcfc_bstats, p->cpu_bstats,
+ &p->tcfc_rate_est,
+ &p->tcfc_lock, est);
if (err) {
- kfree(p);
- return err;
+ free_percpu(p->cpu_qstats);
+ goto err2;
}
}
@@ -615,10 +639,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
if (err < 0)
goto errout;
- if (gnet_stats_copy_basic(&d, NULL, &p->tcfc_bstats) < 0 ||
+ if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfc_bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfc_bstats,
&p->tcfc_rate_est) < 0 ||
- gnet_stats_copy_queue(&d, NULL,
+ gnet_stats_copy_queue(&d, p->cpu_qstats,
&p->tcfc_qstats,
p->tcfc_qstats.qlen) < 0)
goto errout;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 1d56903fd4c7..99aa271633e9 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -281,7 +281,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (!tcf_hash_check(parm->index, act, bind)) {
ret = tcf_hash_create(parm->index, est, act,
- sizeof(*prog), bind);
+ sizeof(*prog), bind, false);
if (ret < 0)
goto destroy_fp;
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 295d14bd6c67..f2b540220ad0 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -108,7 +108,8 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*ci),
+ bind, false);
if (ret)
return ret;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 4cd5cf1aedf8..b07c535ba8e7 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -62,7 +62,8 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
parm = nla_data(tb[TCA_CSUM_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+ bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 7fffc2272701..5c1b05170736 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -28,14 +28,18 @@
#ifdef CONFIG_GACT_PROB
static int gact_net_rand(struct tcf_gact *gact)
{
- if (!gact->tcfg_pval || prandom_u32() % gact->tcfg_pval)
+ smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */
+ if (prandom_u32() % gact->tcfg_pval)
return gact->tcf_action;
return gact->tcfg_paction;
}
static int gact_determ(struct tcf_gact *gact)
{
- if (!gact->tcfg_pval || gact->tcf_bstats.packets % gact->tcfg_pval)
+ u32 pack = atomic_inc_return(&gact->packets);
+
+ smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */
+ if (pack % gact->tcfg_pval)
return gact->tcf_action;
return gact->tcfg_paction;
}
@@ -85,7 +89,8 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
#endif
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*gact),
+ bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -99,16 +104,19 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
gact = to_gact(a);
- spin_lock_bh(&gact->tcf_lock);
+ ASSERT_RTNL();
gact->tcf_action = parm->action;
#ifdef CONFIG_GACT_PROB
if (p_parm) {
gact->tcfg_paction = p_parm->paction;
- gact->tcfg_pval = p_parm->pval;
+ gact->tcfg_pval = max_t(u16, 1, p_parm->pval);
+ /* Make sure tcfg_pval is written before tcfg_ptype
+ * coupled with smp_rmb() in gact_net_rand() & gact_determ()
+ */
+ smp_wmb();
gact->tcfg_ptype = p_parm->ptype;
}
#endif
- spin_unlock_bh(&gact->tcf_lock);
if (ret == ACT_P_CREATED)
tcf_hash_insert(a);
return ret;
@@ -118,23 +126,21 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_gact *gact = a->priv;
- int action = TC_ACT_SHOT;
+ int action = READ_ONCE(gact->tcf_action);
- spin_lock(&gact->tcf_lock);
#ifdef CONFIG_GACT_PROB
- if (gact->tcfg_ptype)
- action = gact_rand[gact->tcfg_ptype](gact);
- else
- action = gact->tcf_action;
-#else
- action = gact->tcf_action;
+ {
+ u32 ptype = READ_ONCE(gact->tcfg_ptype);
+
+ if (ptype)
+ action = gact_rand[ptype](gact);
+ }
#endif
- gact->tcf_bstats.bytes += qdisc_pkt_len(skb);
- gact->tcf_bstats.packets++;
+ bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), skb);
if (action == TC_ACT_SHOT)
- gact->tcf_qstats.drops++;
- gact->tcf_tm.lastuse = jiffies;
- spin_unlock(&gact->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(gact->common.cpu_qstats));
+
+ tcf_lastuse_update(&gact->tcf_tm);
return action;
}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index cbc8dd7dd48a..99c9cc1c7af9 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -114,7 +114,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
index = nla_get_u32(tb[TCA_IPT_INDEX]);
if (!tcf_hash_check(index, a, bind) ) {
- ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind);
+ ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index a42a3b257226..19cd8904efa0 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -35,9 +35,11 @@ static LIST_HEAD(mirred_list);
static void tcf_mirred_release(struct tc_action *a, int bind)
{
struct tcf_mirred *m = to_mirred(a);
+ struct net_device *dev = rcu_dereference_protected(m->tcfm_dev, 1);
+
list_del(&m->tcfm_list);
- if (m->tcfm_dev)
- dev_put(m->tcfm_dev);
+ if (dev)
+ dev_put(dev);
}
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -93,7 +95,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
if (!tcf_hash_check(parm->index, a, bind)) {
if (dev == NULL)
return -EINVAL;
- ret = tcf_hash_create(parm->index, est, a, sizeof(*m), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*m),
+ bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -105,18 +108,18 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
m = to_mirred(a);
- spin_lock_bh(&m->tcf_lock);
+ ASSERT_RTNL();
m->tcf_action = parm->action;
m->tcfm_eaction = parm->eaction;
if (dev != NULL) {
m->tcfm_ifindex = parm->ifindex;
if (ret != ACT_P_CREATED)
- dev_put(m->tcfm_dev);
+ dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
dev_hold(dev);
- m->tcfm_dev = dev;
+ rcu_assign_pointer(m->tcfm_dev, dev);
m->tcfm_ok_push = ok_push;
}
- spin_unlock_bh(&m->tcf_lock);
+
if (ret == ACT_P_CREATED) {
list_add(&m->tcfm_list, &mirred_list);
tcf_hash_insert(a);
@@ -131,20 +134,22 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
struct tcf_mirred *m = a->priv;
struct net_device *dev;
struct sk_buff *skb2;
+ int retval, err;
u32 at;
- int retval, err = 1;
- spin_lock(&m->tcf_lock);
- m->tcf_tm.lastuse = jiffies;
- bstats_update(&m->tcf_bstats, skb);
+ tcf_lastuse_update(&m->tcf_tm);
+
+ bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
- dev = m->tcfm_dev;
- if (!dev) {
- printk_once(KERN_NOTICE "tc mirred: target device is gone\n");
+ rcu_read_lock();
+ retval = READ_ONCE(m->tcf_action);
+ dev = rcu_dereference(m->tcfm_dev);
+ if (unlikely(!dev)) {
+ pr_notice_once("tc mirred: target device is gone\n");
goto out;
}
- if (!(dev->flags & IFF_UP)) {
+ if (unlikely(!(dev->flags & IFF_UP))) {
net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
dev->name);
goto out;
@@ -152,7 +157,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
at = G_TC_AT(skb->tc_verd);
skb2 = skb_clone(skb, GFP_ATOMIC);
- if (skb2 == NULL)
+ if (!skb2)
goto out;
if (!(at & AT_EGRESS)) {
@@ -168,16 +173,13 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
skb2->dev = dev;
err = dev_queue_xmit(skb2);
-out:
if (err) {
- m->tcf_qstats.overlimits++;
+out:
+ qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
retval = TC_ACT_SHOT;
- else
- retval = m->tcf_action;
- } else
- retval = m->tcf_action;
- spin_unlock(&m->tcf_lock);
+ }
+ rcu_read_unlock();
return retval;
}
@@ -216,14 +218,16 @@ static int mirred_device_event(struct notifier_block *unused,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct tcf_mirred *m;
+ ASSERT_RTNL();
if (event == NETDEV_UNREGISTER)
list_for_each_entry(m, &mirred_list, tcfm_list) {
- spin_lock_bh(&m->tcf_lock);
- if (m->tcfm_dev == dev) {
+ if (rcu_access_pointer(m->tcfm_dev) == dev) {
dev_put(dev);
- m->tcfm_dev = NULL;
+ /* Note : no rcu grace period necessary, as
+ * net_device are already rcu protected.
+ */
+ RCU_INIT_POINTER(m->tcfm_dev, NULL);
}
- spin_unlock_bh(&m->tcf_lock);
}
return NOTIFY_DONE;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 270a030d5fd0..5be0b3c1c5b0 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -55,7 +55,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
parm = nla_data(tb[TCA_NAT_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+ bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 17e6d6669c7f..ce8676ad892f 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -57,7 +57,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (!tcf_hash_check(parm->index, a, bind)) {
if (!parm->nkeys)
return -EINVAL;
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+ bind, false);
if (ret)
return ret;
p = to_pedit(a);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 6a8d9488613a..d6b708d6afdf 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -103,7 +103,8 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
defdata = nla_data(tb[TCA_DEF_DATA]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*d),
+ bind, false);
if (ret)
return ret;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index fcfeeaf838be..6751b5f8c046 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -99,7 +99,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*d),
+ bind, false);
if (ret)
return ret;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index d735ecf0b1a7..796785e0bf96 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -116,7 +116,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
action = parm->v_action;
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*v), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*v),
+ bind, false);
if (ret)
return ret;
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index bdf1c1607b80..c77c872fe8ee 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -60,4 +60,29 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
(void *) BPF_FUNC_l4_csum_replace;
+#if defined(__x86_64__)
+
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+
+#elif defined(__s390x__)
+
+#define PT_REGS_PARM1(x) ((x)->gprs[2])
+#define PT_REGS_PARM2(x) ((x)->gprs[3])
+#define PT_REGS_PARM3(x) ((x)->gprs[4])
+#define PT_REGS_PARM4(x) ((x)->gprs[5])
+#define PT_REGS_PARM5(x) ((x)->gprs[6])
+#define PT_REGS_RET(x) ((x)->gprs[14])
+#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->gprs[2])
+#define PT_REGS_SP(x) ((x)->gprs[15])
+
+#endif
#endif
diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c
index 31620463701a..3f450a8fa1f3 100644
--- a/samples/bpf/tracex1_kern.c
+++ b/samples/bpf/tracex1_kern.c
@@ -29,7 +29,7 @@ int bpf_prog1(struct pt_regs *ctx)
int len;
/* non-portable! works for the given kernel only */
- skb = (struct sk_buff *) ctx->di;
+ skb = (struct sk_buff *) PT_REGS_PARM1(ctx);
dev = _(skb->dev);
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
index dc50f4f2943f..b32367cfbff4 100644
--- a/samples/bpf/tracex2_kern.c
+++ b/samples/bpf/tracex2_kern.c
@@ -27,10 +27,10 @@ int bpf_prog2(struct pt_regs *ctx)
long init_val = 1;
long *value;
- /* x64 specific: read ip of kfree_skb caller.
+ /* x64/s390x specific: read ip of kfree_skb caller.
* non-portable version of __builtin_return_address(0)
*/
- bpf_probe_read(&loc, sizeof(loc), (void *)ctx->sp);
+ bpf_probe_read(&loc, sizeof(loc), (void *)PT_REGS_RET(ctx));
value = bpf_map_lookup_elem(&my_map, &loc);
if (value)
@@ -79,7 +79,7 @@ struct bpf_map_def SEC("maps") my_hist_map = {
SEC("kprobe/sys_write")
int bpf_prog3(struct pt_regs *ctx)
{
- long write_size = ctx->dx; /* arg3 */
+ long write_size = PT_REGS_PARM3(ctx);
long init_val = 1;
long *value;
struct hist_key key = {};
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
index 255ff2792366..bf337fbb0947 100644
--- a/samples/bpf/tracex3_kern.c
+++ b/samples/bpf/tracex3_kern.c
@@ -23,7 +23,7 @@ struct bpf_map_def SEC("maps") my_map = {
SEC("kprobe/blk_mq_start_request")
int bpf_prog1(struct pt_regs *ctx)
{
- long rq = ctx->di;
+ long rq = PT_REGS_PARM1(ctx);
u64 val = bpf_ktime_get_ns();
bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY);
@@ -51,7 +51,7 @@ struct bpf_map_def SEC("maps") lat_map = {
SEC("kprobe/blk_update_request")
int bpf_prog2(struct pt_regs *ctx)
{
- long rq = ctx->di;
+ long rq = PT_REGS_PARM1(ctx);
u64 *value, l, base;
u32 index;
diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4_kern.c
index 126b80512228..ac4671420cf1 100644
--- a/samples/bpf/tracex4_kern.c
+++ b/samples/bpf/tracex4_kern.c
@@ -27,7 +27,7 @@ struct bpf_map_def SEC("maps") my_map = {
SEC("kprobe/kmem_cache_free")
int bpf_prog1(struct pt_regs *ctx)
{
- long ptr = ctx->si;
+ long ptr = PT_REGS_PARM2(ctx);
bpf_map_delete_elem(&my_map, &ptr);
return 0;
@@ -36,11 +36,11 @@ int bpf_prog1(struct pt_regs *ctx)
SEC("kretprobe/kmem_cache_alloc_node")
int bpf_prog2(struct pt_regs *ctx)
{
- long ptr = ctx->ax;
+ long ptr = PT_REGS_RC(ctx);
long ip = 0;
/* get ip address of kmem_cache_alloc_node() caller */
- bpf_probe_read(&ip, sizeof(ip), (void *)(ctx->bp + sizeof(ip)));
+ bpf_probe_read(&ip, sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip)));
struct pair v = {
.val = bpf_ktime_get_ns(),
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
index b71fe07a7a7a..b3f4295bf288 100644
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5_kern.c
@@ -24,7 +24,7 @@ int bpf_prog1(struct pt_regs *ctx)
{
struct seccomp_data sd = {};
- bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
+ bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
/* dispatch into next BPF program depending on syscall number */
bpf_tail_call(ctx, &progs, sd.nr);
@@ -42,7 +42,7 @@ PROG(__NR_write)(struct pt_regs *ctx)
{
struct seccomp_data sd = {};
- bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
+ bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
if (sd.args[2] == 512) {
char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
@@ -55,7 +55,7 @@ PROG(__NR_read)(struct pt_regs *ctx)
{
struct seccomp_data sd = {};
- bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
+ bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
if (sd.args[2] > 128 && sd.args[2] <= 1024) {
char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),