diff options
105 files changed, 1042 insertions, 698 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5fae7704daab..f63aeefd2c24 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1435,6 +1435,11 @@ mtu - INTEGER Default Maximum Transfer Unit Default: 1280 (IPv6 required minimum) +ip_nonlocal_bind - BOOLEAN + If set, allows processes to bind() to non-local IPv6 addresses, + which can be quite useful - but may break some applications. + Default: 0 + router_probe_interval - INTEGER Minimum interval (in seconds) between Router Probing described in RFC4191. diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 7ad0a4d8e475..4c483d937481 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -46,13 +46,13 @@ config NET_DSA_MV88E6171 ethernet switches chips. config NET_DSA_MV88E6352 - tristate "Marvell 88E6172/88E6176/88E6352 ethernet switch chip support" + tristate "Marvell 88E6172/6176/6320/6321/6352 ethernet switch chip support" depends on NET_DSA select NET_DSA_MV88E6XXX select NET_DSA_TAG_EDSA ---help--- - This enables support for the Marvell 88E6172, 88E6176 and 88E6352 - ethernet switch chips. + This enables support for the Marvell 88E6172, 88E6176, 88E6320, + 88E6321 and 88E6352 ethernet switch chips. config NET_DSA_BCM_SF2 tristate "Broadcom Starfighter 2 Ethernet switch support" diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c index 632815c10a40..cfece5ae9d5f 100644 --- a/drivers/net/dsa/mv88e6352.c +++ b/drivers/net/dsa/mv88e6352.c @@ -36,6 +36,18 @@ static char *mv88e6352_probe(struct device *host_dev, int sw_addr) return "Marvell 88E6172"; if ((ret & 0xfff0) == PORT_SWITCH_ID_6176) return "Marvell 88E6176"; + if (ret == PORT_SWITCH_ID_6320_A1) + return "Marvell 88E6320 (A1)"; + if (ret == PORT_SWITCH_ID_6320_A2) + return "Marvell 88e6320 (A2)"; + if ((ret & 0xfff0) == PORT_SWITCH_ID_6320) + return "Marvell 88E6320"; + if (ret == PORT_SWITCH_ID_6321_A1) + return "Marvell 88E6321 (A1)"; + if (ret == PORT_SWITCH_ID_6321_A2) + return "Marvell 88e6321 (A2)"; + if ((ret & 0xfff0) == PORT_SWITCH_ID_6321) + return "Marvell 88E6321"; if (ret == PORT_SWITCH_ID_6352_A0) return "Marvell 88E6352 (A0)"; if (ret == PORT_SWITCH_ID_6352_A1) @@ -84,11 +96,12 @@ static int mv88e6352_setup_global(struct dsa_switch *ds) static int mv88e6352_get_temp(struct dsa_switch *ds, int *temp) { + int phy = mv88e6xxx_6320_family(ds) ? 3 : 0; int ret; *temp = 0; - ret = mv88e6xxx_phy_page_read(ds, 0, 6, 27); + ret = mv88e6xxx_phy_page_read(ds, phy, 6, 27); if (ret < 0) return ret; @@ -99,11 +112,12 @@ static int mv88e6352_get_temp(struct dsa_switch *ds, int *temp) static int mv88e6352_get_temp_limit(struct dsa_switch *ds, int *temp) { + int phy = mv88e6xxx_6320_family(ds) ? 3 : 0; int ret; *temp = 0; - ret = mv88e6xxx_phy_page_read(ds, 0, 6, 26); + ret = mv88e6xxx_phy_page_read(ds, phy, 6, 26); if (ret < 0) return ret; @@ -114,23 +128,25 @@ static int mv88e6352_get_temp_limit(struct dsa_switch *ds, int *temp) static int mv88e6352_set_temp_limit(struct dsa_switch *ds, int temp) { + int phy = mv88e6xxx_6320_family(ds) ? 3 : 0; int ret; - ret = mv88e6xxx_phy_page_read(ds, 0, 6, 26); + ret = mv88e6xxx_phy_page_read(ds, phy, 6, 26); if (ret < 0) return ret; temp = clamp_val(DIV_ROUND_CLOSEST(temp, 5) + 5, 0, 0x1f); - return mv88e6xxx_phy_page_write(ds, 0, 6, 26, + return mv88e6xxx_phy_page_write(ds, phy, 6, 26, (ret & 0xe0ff) | (temp << 8)); } static int mv88e6352_get_temp_alarm(struct dsa_switch *ds, bool *alarm) { + int phy = mv88e6xxx_6320_family(ds) ? 3 : 0; int ret; *alarm = false; - ret = mv88e6xxx_phy_page_read(ds, 0, 6, 26); + ret = mv88e6xxx_phy_page_read(ds, phy, 6, 26); if (ret < 0) return ret; @@ -394,5 +410,8 @@ struct dsa_switch_driver mv88e6352_switch_driver = { .fdb_getnext = mv88e6xxx_port_fdb_getnext, }; -MODULE_ALIAS("platform:mv88e6352"); MODULE_ALIAS("platform:mv88e6172"); +MODULE_ALIAS("platform:mv88e6176"); +MODULE_ALIAS("platform:mv88e6320"); +MODULE_ALIAS("platform:mv88e6321"); +MODULE_ALIAS("platform:mv88e6352"); diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index fd8547c2b79d..f394e4d4d9e0 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -517,6 +517,18 @@ static bool mv88e6xxx_6185_family(struct dsa_switch *ds) return false; } +bool mv88e6xxx_6320_family(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + switch (ps->id) { + case PORT_SWITCH_ID_6320: + case PORT_SWITCH_ID_6321: + return true; + } + return false; +} + static bool mv88e6xxx_6351_family(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); @@ -565,7 +577,7 @@ static int _mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port) { int ret; - if (mv88e6xxx_6352_family(ds)) + if (mv88e6xxx_6320_family(ds) || mv88e6xxx_6352_family(ds)) port = (port + 1) << 5; /* Snapshot the hardware statistics counters for this port. */ @@ -1377,7 +1389,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds) || - mv88e6xxx_6065_family(ds)) { + mv88e6xxx_6065_family(ds) || mv88e6xxx_6320_family(ds)) { /* MAC Forcing register: don't force link, speed, * duplex or flow control state to any particular * values on physical ports, but force the CPU port @@ -1423,7 +1435,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) || - mv88e6xxx_6185_family(ds)) + mv88e6xxx_6185_family(ds) || mv88e6xxx_6320_family(ds)) reg = PORT_CONTROL_IGMP_MLD_SNOOP | PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP | PORT_CONTROL_STATE_FORWARDING; @@ -1431,7 +1443,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds)) reg |= PORT_CONTROL_DSA_TAG; if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) { + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6320_family(ds)) { if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA; else @@ -1441,14 +1454,15 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) || - mv88e6xxx_6185_family(ds)) { + mv88e6xxx_6185_family(ds) || mv88e6xxx_6320_family(ds)) { if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) reg |= PORT_CONTROL_EGRESS_ADD_TAG; } } if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || - mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds)) { + mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) || + mv88e6xxx_6320_family(ds)) { if (ds->dsa_port_mask & (1 << port)) reg |= PORT_CONTROL_FRAME_MODE_DSA; if (port == dsa_upstream_port(ds)) @@ -1473,11 +1487,11 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) reg = 0; if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || - mv88e6xxx_6095_family(ds)) + mv88e6xxx_6095_family(ds) || mv88e6xxx_6320_family(ds)) reg = PORT_CONTROL_2_MAP_DA; if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds)) + mv88e6xxx_6165_family(ds) || mv88e6xxx_6320_family(ds)) reg |= PORT_CONTROL_2_JUMBO_10240; if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds)) { @@ -1514,7 +1528,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) goto abort; if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) { + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6320_family(ds)) { /* Do not limit the period of time that this port can * be paused for by the remote end or the period of * time that this port can pause the remote end. @@ -1564,7 +1579,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || - mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds)) { + mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds) || + mv88e6xxx_6320_family(ds)) { /* Rate Control: disable ingress rate limiting. */ ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_RATE_CONTROL, 0x0001); @@ -1976,7 +1992,8 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds) (i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT)); if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) { + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6320_family(ds)) { /* Send all frames with destination addresses matching * 01:80:c2:00:00:2x to the CPU port. */ @@ -1995,7 +2012,8 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds) if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || - mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds)) { + mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds) || + mv88e6xxx_6320_family(ds)) { /* Disable ingress rate limiting by resetting all * ingress rate limit registers to their initial * state. diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h index a650b2656de9..64786cb89a93 100644 --- a/drivers/net/dsa/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx.h @@ -89,7 +89,12 @@ #define PORT_SWITCH_ID_6182 0x1a60 #define PORT_SWITCH_ID_6185 0x1a70 #define PORT_SWITCH_ID_6240 0x2400 -#define PORT_SWITCH_ID_6320 0x1250 +#define PORT_SWITCH_ID_6320 0x1150 +#define PORT_SWITCH_ID_6320_A1 0x1151 +#define PORT_SWITCH_ID_6320_A2 0x1152 +#define PORT_SWITCH_ID_6321 0x3100 +#define PORT_SWITCH_ID_6321_A1 0x3101 +#define PORT_SWITCH_ID_6321_A2 0x3102 #define PORT_SWITCH_ID_6350 0x3710 #define PORT_SWITCH_ID_6351 0x3750 #define PORT_SWITCH_ID_6352 0x3520 @@ -410,6 +415,7 @@ int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port, int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg); int mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page, int reg, int val); +bool mv88e6xxx_6320_family(struct dsa_switch *ds); extern struct dsa_switch_driver mv88e6131_switch_driver; extern struct dsa_switch_driver mv88e6123_61_65_switch_driver; extern struct dsa_switch_driver mv88e6352_switch_driver; diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index caeb39561567..a4e3f8655cb8 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2741,8 +2741,7 @@ static const struct macb_config emac_config = { static const struct macb_config zynqmp_config = { - .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE | - MACB_CAPS_JUMBO, + .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = macb_init, @@ -2750,8 +2749,7 @@ static const struct macb_config zynqmp_config = { }; static const struct macb_config zynq_config = { - .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE | - MACB_CAPS_NO_GIGABIT_HALF, + .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = macb_init, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index a11485fbb33f..b135d05c9984 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -151,6 +151,45 @@ static int cim_la_show_3in1(struct seq_file *seq, void *v, int idx) return 0; } +static int cim_la_show_t6(struct seq_file *seq, void *v, int idx) +{ + if (v == SEQ_START_TOKEN) { + seq_puts(seq, "Status Inst Data PC LS0Stat " + "LS0Addr LS0Data LS1Stat LS1Addr LS1Data\n"); + } else { + const u32 *p = v; + + seq_printf(seq, " %02x %04x%04x %04x%04x %04x%04x %08x %08x %08x %08x %08x %08x\n", + (p[9] >> 16) & 0xff, /* Status */ + p[9] & 0xffff, p[8] >> 16, /* Inst */ + p[8] & 0xffff, p[7] >> 16, /* Data */ + p[7] & 0xffff, p[6] >> 16, /* PC */ + p[2], p[1], p[0], /* LS0 Stat, Addr and Data */ + p[5], p[4], p[3]); /* LS1 Stat, Addr and Data */ + } + return 0; +} + +static int cim_la_show_pc_t6(struct seq_file *seq, void *v, int idx) +{ + if (v == SEQ_START_TOKEN) { + seq_puts(seq, "Status Inst Data PC\n"); + } else { + const u32 *p = v; + + seq_printf(seq, " %02x %08x %08x %08x\n", + p[3] & 0xff, p[2], p[1], p[0]); + seq_printf(seq, " %02x %02x%06x %02x%06x %02x%06x\n", + (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8, + p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8); + seq_printf(seq, " %02x %04x%04x %04x%04x %04x%04x\n", + (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16, + p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff, + p[6] >> 16); + } + return 0; +} + static int cim_la_open(struct inode *inode, struct file *file) { int ret; @@ -162,9 +201,18 @@ static int cim_la_open(struct inode *inode, struct file *file) if (ret) return ret; - p = seq_open_tab(file, adap->params.cim_la_size / 8, 8 * sizeof(u32), 1, - cfg & UPDBGLACAPTPCONLY_F ? - cim_la_show_3in1 : cim_la_show); + if (is_t6(adap->params.chip)) { + /* +1 to account for integer division of CIMLA_SIZE/10 */ + p = seq_open_tab(file, (adap->params.cim_la_size / 10) + 1, + 10 * sizeof(u32), 1, + cfg & UPDBGLACAPTPCONLY_F ? + cim_la_show_pc_t6 : cim_la_show_t6); + } else { + p = seq_open_tab(file, adap->params.cim_la_size / 8, + 8 * sizeof(u32), 1, + cfg & UPDBGLACAPTPCONLY_F ? cim_la_show_3in1 : + cim_la_show); + } if (!p) return -ENOMEM; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 351f3b1bf800..d582e175dfb6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4757,7 +4757,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) */ cfg_queues(adapter); - adapter->l2t = t4_init_l2t(); + adapter->l2t = t4_init_l2t(adapter->l2t_start, adapter->l2t_end); if (!adapter->l2t) { /* We tolerate a lack of L2T, giving up some functionality */ dev_warn(&pdev->dev, "could not allocate L2T, continuing\n"); diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index 252efc29321f..ac27898c6ab0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -51,24 +51,17 @@ #define VLAN_NONE 0xfff /* identifies sync vs async L2T_WRITE_REQs */ -#define F_SYNC_WR (1 << 12) - -enum { - L2T_STATE_VALID, /* entry is up to date */ - L2T_STATE_STALE, /* entry may be used but needs revalidation */ - L2T_STATE_RESOLVING, /* entry needs address resolution */ - L2T_STATE_SYNC_WRITE, /* synchronous write of entry underway */ - - /* when state is one of the below the entry is not hashed */ - L2T_STATE_SWITCHING, /* entry is being used by a switching filter */ - L2T_STATE_UNUSED /* entry not in use */ -}; +#define SYNC_WR_S 12 +#define SYNC_WR_V(x) ((x) << SYNC_WR_S) +#define SYNC_WR_F SYNC_WR_V(1) struct l2t_data { + unsigned int l2t_start; /* start index of our piece of the L2T */ + unsigned int l2t_size; /* number of entries in l2tab */ rwlock_t lock; atomic_t nfree; /* number of free entries */ struct l2t_entry *rover; /* starting point for next allocation */ - struct l2t_entry l2tab[L2T_SIZE]; + struct l2t_entry l2tab[0]; /* MUST BE LAST */ }; static inline unsigned int vlan_prio(const struct l2t_entry *e) @@ -85,29 +78,36 @@ static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e) /* * To avoid having to check address families we do not allow v4 and v6 * neighbors to be on the same hash chain. We keep v4 entries in the first - * half of available hash buckets and v6 in the second. + * half of available hash buckets and v6 in the second. We need at least two + * entries in our L2T for this scheme to work. */ enum { - L2T_SZ_HALF = L2T_SIZE / 2, - L2T_HASH_MASK = L2T_SZ_HALF - 1 + L2T_MIN_HASH_BUCKETS = 2, }; -static inline unsigned int arp_hash(const u32 *key, int ifindex) +static inline unsigned int arp_hash(struct l2t_data *d, const u32 *key, + int ifindex) { - return jhash_2words(*key, ifindex, 0) & L2T_HASH_MASK; + unsigned int l2t_size_half = d->l2t_size / 2; + + return jhash_2words(*key, ifindex, 0) % l2t_size_half; } -static inline unsigned int ipv6_hash(const u32 *key, int ifindex) +static inline unsigned int ipv6_hash(struct l2t_data *d, const u32 *key, + int ifindex) { + unsigned int l2t_size_half = d->l2t_size / 2; u32 xor = key[0] ^ key[1] ^ key[2] ^ key[3]; - return L2T_SZ_HALF + (jhash_2words(xor, ifindex, 0) & L2T_HASH_MASK); + return (l2t_size_half + + (jhash_2words(xor, ifindex, 0) % l2t_size_half)); } -static unsigned int addr_hash(const u32 *addr, int addr_len, int ifindex) +static unsigned int addr_hash(struct l2t_data *d, const u32 *addr, + int addr_len, int ifindex) { - return addr_len == 4 ? arp_hash(addr, ifindex) : - ipv6_hash(addr, ifindex); + return addr_len == 4 ? arp_hash(d, addr, ifindex) : + ipv6_hash(d, addr, ifindex); } /* @@ -139,6 +139,8 @@ static void neigh_replace(struct l2t_entry *e, struct neighbour *n) */ static int write_l2e(struct adapter *adap, struct l2t_entry *e, int sync) { + struct l2t_data *d = adap->l2t; + unsigned int l2t_idx = e->idx + d->l2t_start; struct sk_buff *skb; struct cpl_l2t_write_req *req; @@ -150,10 +152,10 @@ static int write_l2e(struct adapter *adap, struct l2t_entry *e, int sync) INIT_TP_WR(req, 0); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, - e->idx | (sync ? F_SYNC_WR : 0) | + l2t_idx | (sync ? SYNC_WR_F : 0) | TID_QID_V(adap->sge.fw_evtq.abs_id))); req->params = htons(L2T_W_PORT_V(e->lport) | L2T_W_NOREPLY_V(!sync)); - req->l2t_idx = htons(e->idx); + req->l2t_idx = htons(l2t_idx); req->vlan = htons(e->vlan); if (e->neigh && !(e->neigh->dev->flags & IFF_LOOPBACK)) memcpy(e->dmac, e->neigh->ha, sizeof(e->dmac)); @@ -190,18 +192,19 @@ static void send_pending(struct adapter *adap, struct l2t_entry *e) */ void do_l2t_write_rpl(struct adapter *adap, const struct cpl_l2t_write_rpl *rpl) { + struct l2t_data *d = adap->l2t; unsigned int tid = GET_TID(rpl); - unsigned int idx = tid & (L2T_SIZE - 1); + unsigned int l2t_idx = tid % L2T_SIZE; if (unlikely(rpl->status != CPL_ERR_NONE)) { dev_err(adap->pdev_dev, "Unexpected L2T_WRITE_RPL status %u for entry %u\n", - rpl->status, idx); + rpl->status, l2t_idx); return; } - if (tid & F_SYNC_WR) { - struct l2t_entry *e = &adap->l2t->l2tab[idx]; + if (tid & SYNC_WR_F) { + struct l2t_entry *e = &d->l2tab[l2t_idx - d->l2t_start]; spin_lock(&e->lock); if (e->state != L2T_STATE_SWITCHING) { @@ -276,7 +279,7 @@ static struct l2t_entry *alloc_l2e(struct l2t_data *d) return NULL; /* there's definitely a free entry */ - for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e) + for (e = d->rover, end = &d->l2tab[d->l2t_size]; e != end; ++e) if (atomic_read(&e->refcnt) == 0) goto found; @@ -368,7 +371,7 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh, int addr_len = neigh->tbl->key_len; u32 *addr = (u32 *)neigh->primary_key; int ifidx = neigh->dev->ifindex; - int hash = addr_hash(addr, addr_len, ifidx); + int hash = addr_hash(d, addr, addr_len, ifidx); if (neigh->dev->flags & IFF_LOOPBACK) lport = netdev2pinfo(physdev)->tx_chan + 4; @@ -481,7 +484,7 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh) int addr_len = neigh->tbl->key_len; u32 *addr = (u32 *) neigh->primary_key; int ifidx = neigh->dev->ifindex; - int hash = addr_hash(addr, addr_len, ifidx); + int hash = addr_hash(d, addr, addr_len, ifidx); read_lock_bh(&d->lock); for (e = d->l2tab[hash].first; e; e = e->next) @@ -554,20 +557,30 @@ int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan, return write_l2e(adap, e, 0); } -struct l2t_data *t4_init_l2t(void) +struct l2t_data *t4_init_l2t(unsigned int l2t_start, unsigned int l2t_end) { + unsigned int l2t_size; int i; struct l2t_data *d; - d = t4_alloc_mem(sizeof(*d)); + if (l2t_start >= l2t_end || l2t_end >= L2T_SIZE) + return NULL; + l2t_size = l2t_end - l2t_start + 1; + if (l2t_size < L2T_MIN_HASH_BUCKETS) + return NULL; + + d = t4_alloc_mem(sizeof(*d) + l2t_size * sizeof(struct l2t_entry)); if (!d) return NULL; + d->l2t_start = l2t_start; + d->l2t_size = l2t_size; + d->rover = d->l2tab; - atomic_set(&d->nfree, L2T_SIZE); + atomic_set(&d->nfree, l2t_size); rwlock_init(&d->lock); - for (i = 0; i < L2T_SIZE; ++i) { + for (i = 0; i < d->l2t_size; ++i) { d->l2tab[i].idx = i; d->l2tab[i].state = L2T_STATE_UNUSED; spin_lock_init(&d->l2tab[i].lock); @@ -578,9 +591,9 @@ struct l2t_data *t4_init_l2t(void) static inline void *l2t_get_idx(struct seq_file *seq, loff_t pos) { - struct l2t_entry *l2tab = seq->private; + struct l2t_data *d = seq->private; - return pos >= L2T_SIZE ? NULL : &l2tab[pos]; + return pos >= d->l2t_size ? NULL : &d->l2tab[pos]; } static void *l2t_seq_start(struct seq_file *seq, loff_t *pos) @@ -620,6 +633,7 @@ static int l2t_seq_show(struct seq_file *seq, void *v) "Ethernet address VLAN/P LP State Users Port\n"); else { char ip[60]; + struct l2t_data *d = seq->private; struct l2t_entry *e = v; spin_lock_bh(&e->lock); @@ -628,7 +642,7 @@ static int l2t_seq_show(struct seq_file *seq, void *v) else sprintf(ip, e->v6 ? "%pI6c" : "%pI4", e->addr); seq_printf(seq, "%4u %-25s %17pM %4d %u %2u %c %5u %s\n", - e->idx, ip, e->dmac, + e->idx + d->l2t_start, ip, e->dmac, e->vlan & VLAN_VID_MASK, vlan_prio(e), e->lport, l2e_state(e), atomic_read(&e->refcnt), e->neigh ? e->neigh->dev->name : ""); @@ -652,7 +666,7 @@ static int l2t_seq_open(struct inode *inode, struct file *file) struct adapter *adap = inode->i_private; struct seq_file *seq = file->private_data; - seq->private = adap->l2t->l2tab; + seq->private = adap->l2t; } return rc; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.h b/drivers/net/ethernet/chelsio/cxgb4/l2t.h index a30126ce90cb..b38dc526aad5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.h +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.h @@ -39,6 +39,20 @@ #include <linux/if_ether.h> #include <linux/atomic.h> +enum { L2T_SIZE = 4096 }; /* # of L2T entries */ + +enum { + L2T_STATE_VALID, /* entry is up to date */ + L2T_STATE_STALE, /* entry may be used but needs revalidation */ + L2T_STATE_RESOLVING, /* entry needs address resolution */ + L2T_STATE_SYNC_WRITE, /* synchronous write of entry underway */ + L2T_STATE_NOARP, /* Netdev down or removed*/ + + /* when state is one of the below the entry is not hashed */ + L2T_STATE_SWITCHING, /* entry is being used by a switching filter */ + L2T_STATE_UNUSED /* entry not in use */ +}; + struct adapter; struct l2t_data; struct neighbour; @@ -56,7 +70,7 @@ struct cpl_l2t_write_rpl; */ struct l2t_entry { u16 state; /* entry state */ - u16 idx; /* entry index */ + u16 idx; /* entry index within in-memory table */ u32 addr[4]; /* next hop IP or IPv6 address */ int ifindex; /* neighbor's net_device's ifindex */ struct neighbour *neigh; /* associated neighbour */ @@ -104,7 +118,7 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh); struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *d); int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan, u8 port, u8 *eth_addr); -struct l2t_data *t4_init_l2t(void); +struct l2t_data *t4_init_l2t(unsigned int l2t_start, unsigned int l2t_end); void do_l2t_write_rpl(struct adapter *p, const struct cpl_l2t_write_rpl *rpl); extern const struct file_operations t4_l2t_fops; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 2b52aae7ec86..1e6597dc8736 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -1345,9 +1345,9 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x5a80, 0x5a9c, 0x5b94, 0x5bfc, 0x5c10, 0x5ec0, - 0x5ec8, 0x5ec8, + 0x5ec8, 0x5ecc, 0x6000, 0x6040, - 0x6058, 0x6154, + 0x6058, 0x615c, 0x7700, 0x7798, 0x77c0, 0x7880, 0x78cc, 0x78fc, @@ -1371,20 +1371,22 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x9f00, 0x9f6c, 0x9f80, 0xa020, 0xd004, 0xd03c, + 0xd100, 0xd118, + 0xd200, 0xd31c, 0xdfc0, 0xdfe0, 0xe000, 0xf008, 0x11000, 0x11014, 0x11048, 0x11110, 0x11118, 0x1117c, - 0x11190, 0x11260, + 0x11190, 0x11264, 0x11300, 0x1130c, - 0x12000, 0x1205c, + 0x12000, 0x1206c, 0x19040, 0x1906c, 0x19078, 0x19080, 0x1908c, 0x19124, 0x19150, 0x191b0, 0x191d0, 0x191e8, - 0x19238, 0x192b8, + 0x19238, 0x192bc, 0x193f8, 0x19474, 0x19490, 0x194cc, 0x194f0, 0x194f8, @@ -1466,7 +1468,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x30200, 0x30318, 0x30400, 0x3052c, 0x30540, 0x3061c, - 0x30800, 0x3088c, + 0x30800, 0x30890, 0x308c0, 0x30908, 0x30910, 0x309b8, 0x30a00, 0x30a04, @@ -1544,7 +1546,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x34200, 0x34318, 0x34400, 0x3452c, 0x34540, 0x3461c, - 0x34800, 0x3488c, + 0x34800, 0x34890, 0x348c0, 0x34908, 0x34910, 0x349b8, 0x34a00, 0x34a04, @@ -3924,43 +3926,25 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, */ void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st) { - /* T6 and later has 2 channels */ - if (adap->params.arch.nchan == NCHAN) { - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->mac_in_errs, 12, TP_MIB_MAC_IN_ERR_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tnl_cong_drops, 8, - TP_MIB_TNL_CNG_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tnl_tx_drops, 4, - TP_MIB_TNL_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->ofld_vlan_drops, 4, - TP_MIB_OFD_VLN_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tcp6_in_errs, 4, - TP_MIB_TCP_V6IN_ERR_0_A); - } else { - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->mac_in_errs, 2, TP_MIB_MAC_IN_ERR_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->hdr_in_errs, 2, TP_MIB_HDR_IN_ERR_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tcp_in_errs, 2, TP_MIB_TCP_IN_ERR_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tnl_cong_drops, 2, - TP_MIB_TNL_CNG_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->ofld_chan_drops, 2, - TP_MIB_OFD_CHN_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tnl_tx_drops, 2, TP_MIB_TNL_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->ofld_vlan_drops, 2, - TP_MIB_OFD_VLN_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tcp6_in_errs, 2, TP_MIB_TCP_V6IN_ERR_0_A); - } + int nchan = adap->params.arch.nchan; + + t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, + st->mac_in_errs, nchan, TP_MIB_MAC_IN_ERR_0_A); + t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, + st->hdr_in_errs, nchan, TP_MIB_HDR_IN_ERR_0_A); + t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, + st->tcp_in_errs, nchan, TP_MIB_TCP_IN_ERR_0_A); + t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, + st->tnl_cong_drops, nchan, TP_MIB_TNL_CNG_DROP_0_A); + t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, + st->ofld_chan_drops, nchan, TP_MIB_OFD_CHN_DROP_0_A); + t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, + st->tnl_tx_drops, nchan, TP_MIB_TNL_DROP_0_A); + t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, + st->ofld_vlan_drops, nchan, TP_MIB_OFD_VLN_DROP_0_A); + t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, + st->tcp6_in_errs, nchan, TP_MIB_TCP_V6IN_ERR_0_A); + t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, &st->ofld_no_neigh, 2, TP_MIB_OFD_ARP_DROP_A); } @@ -3974,16 +3958,13 @@ void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st) */ void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st) { - /* T6 and later has 2 channels */ - if (adap->params.arch.nchan == NCHAN) { - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->req, - 8, TP_MIB_CPL_IN_REQ_0_A); - } else { - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->req, - 2, TP_MIB_CPL_IN_REQ_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->rsp, - 2, TP_MIB_CPL_OUT_RSP_0_A); - } + int nchan = adap->params.arch.nchan; + + t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->req, + nchan, TP_MIB_CPL_IN_REQ_0_A); + t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->rsp, + nchan, TP_MIB_CPL_OUT_RSP_0_A); + } /** diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index c8488f430d19..640369df8b3a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -47,7 +47,6 @@ enum { TCB_SIZE = 128, /* TCB size */ NMTUS = 16, /* size of MTU table */ NCCTRL_WIN = 32, /* # of congestion control windows */ - L2T_SIZE = 4096, /* # of L2T entries */ PM_NSTATS = 5, /* # of PM stats */ MBOX_LEN = 64, /* mailbox size in bytes */ TRACE_LEN = 112, /* length of trace data and mask */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index d7ca106927b0..8353a6cbfcc2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -142,6 +142,8 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x5013), /* T580-chr */ CH_PCI_ID_TABLE_FENTRY(0x5014), /* T580-so */ CH_PCI_ID_TABLE_FENTRY(0x5015), /* T502-bt */ + CH_PCI_ID_TABLE_FENTRY(0x5016), /* T580-OCP-SO */ + CH_PCI_ID_TABLE_FENTRY(0x5017), /* T520-OCP-SO */ CH_PCI_ID_TABLE_FENTRY(0x5080), /* Custom T540-cr */ CH_PCI_ID_TABLE_FENTRY(0x5081), /* Custom T540-LL-cr */ CH_PCI_ID_TABLE_FENTRY(0x5082), /* Custom T504-cr */ @@ -155,6 +157,22 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x5090), /* Custom T540-CR */ CH_PCI_ID_TABLE_FENTRY(0x5091), /* Custom T522-CR */ CH_PCI_ID_TABLE_FENTRY(0x5092), /* Custom T520-CR */ + + /* T6 adapters: + */ + CH_PCI_ID_TABLE_FENTRY(0x6001), + CH_PCI_ID_TABLE_FENTRY(0x6002), + CH_PCI_ID_TABLE_FENTRY(0x6003), + CH_PCI_ID_TABLE_FENTRY(0x6004), + CH_PCI_ID_TABLE_FENTRY(0x6005), + CH_PCI_ID_TABLE_FENTRY(0x6006), + CH_PCI_ID_TABLE_FENTRY(0x6007), + CH_PCI_ID_TABLE_FENTRY(0x6009), + CH_PCI_ID_TABLE_FENTRY(0x600d), + CH_PCI_ID_TABLE_FENTRY(0x6010), + CH_PCI_ID_TABLE_FENTRY(0x6011), + CH_PCI_ID_TABLE_FENTRY(0x6014), + CH_PCI_ID_TABLE_FENTRY(0x6015), CH_PCI_DEVICE_ID_TABLE_DEFINE_END; #endif /* __T4_PCI_ID_TBL_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index ad53e5ad2acd..1d5e77a566e1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -1898,7 +1898,10 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) rspq->unhandled_irqs++; val = CIDXINC_V(work_done) | SEINTARM_V(intr_params); - if (is_t4(rspq->adapter->params.chip)) { + /* If we don't have access to the new User GTS (T5+), use the old + * doorbell mechanism; otherwise use the new BAR2 mechanism. + */ + if (unlikely(!rspq->bar2_addr)) { t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS, val | INGRESSQID_V((u32)rspq->cntxt_id)); @@ -1998,10 +2001,13 @@ static unsigned int process_intrq(struct adapter *adapter) } val = CIDXINC_V(work_done) | SEINTARM_V(intrq->intr_params); - if (is_t4(adapter->params.chip)) + /* If we don't have access to the new User GTS (T5+), use the old + * doorbell mechanism; otherwise use the new BAR2 mechanism. + */ + if (unlikely(!intrq->bar2_addr)) { t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS, val | INGRESSQID_V(intrq->cntxt_id)); - else { + } else { writel(val | INGRESSQID_V(intrq->bar2_qid), intrq->bar2_addr + SGE_UDB_GTS); wmb(); diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c index d1017509b08a..f7b42483921c 100644 --- a/drivers/net/ethernet/ec_bhf.c +++ b/drivers/net/ethernet/ec_bhf.c @@ -604,19 +604,7 @@ static struct pci_driver pci_driver = { .probe = ec_bhf_probe, .remove = ec_bhf_remove, }; - -static int __init ec_bhf_init(void) -{ - return pci_register_driver(&pci_driver); -} - -static void __exit ec_bhf_exit(void) -{ - pci_unregister_driver(&pci_driver); -} - -module_init(ec_bhf_init); -module_exit(ec_bhf_exit); +module_pci_driver(pci_driver); module_param(polling_frequency, long, S_IRUGO); MODULE_PARM_DESC(polling_frequency, "Polling timer frequency in ns"); diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 8d12b41b3b19..cb5777bb7429 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -37,7 +37,7 @@ #include "be_hw.h" #include "be_roce.h" -#define DRV_VER "10.6.0.2" +#define DRV_VER "10.6.0.3" #define DRV_NAME "be2net" #define BE_NAME "Emulex BladeEngine2" #define BE3_NAME "Emulex BladeEngine3" diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 9eac3227d2ca..ecad46f79653 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -88,19 +88,21 @@ static inline void *embedded_payload(struct be_mcc_wrb *wrb) return wrb->payload.embedded_payload; } -static void be_mcc_notify(struct be_adapter *adapter) +static int be_mcc_notify(struct be_adapter *adapter) { struct be_queue_info *mccq = &adapter->mcc_obj.q; u32 val = 0; if (be_check_error(adapter, BE_ERROR_ANY)) - return; + return -EIO; val |= mccq->id & DB_MCCQ_RING_ID_MASK; val |= 1 << DB_MCCQ_NUM_POSTED_SHIFT; wmb(); iowrite32(val, adapter->db + DB_MCCQ_OFFSET); + + return 0; } /* To check if valid bit is set, check the entire word as we don't know @@ -170,6 +172,12 @@ static void be_async_cmd_process(struct be_adapter *adapter, return; } + if (opcode == OPCODE_LOWLEVEL_SET_LOOPBACK_MODE && + subsystem == CMD_SUBSYSTEM_LOWLEVEL) { + complete(&adapter->et_cmd_compl); + return; + } + if ((opcode == OPCODE_COMMON_WRITE_FLASHROM || opcode == OPCODE_COMMON_WRITE_OBJECT) && subsystem == CMD_SUBSYSTEM_COMMON) { @@ -541,7 +549,9 @@ static int be_mcc_notify_wait(struct be_adapter *adapter) resp = be_decode_resp_hdr(wrb->tag0, wrb->tag1); - be_mcc_notify(adapter); + status = be_mcc_notify(adapter); + if (status) + goto out; status = be_mcc_wait_compl(adapter); if (status == -EIO) @@ -1547,7 +1557,10 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd) else hdr->version = 2; - be_mcc_notify(adapter); + status = be_mcc_notify(adapter); + if (status) + goto err; + adapter->stats_cmd_sent = true; err: @@ -1583,7 +1596,10 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, req->cmd_params.params.pport_num = cpu_to_le16(adapter->hba_port_num); req->cmd_params.params.reset_stats = 0; - be_mcc_notify(adapter); + status = be_mcc_notify(adapter); + if (status) + goto err; + adapter->stats_cmd_sent = true; err: @@ -1687,8 +1703,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter) OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES, sizeof(*req), wrb, NULL); - be_mcc_notify(adapter); - + status = be_mcc_notify(adapter); err: spin_unlock_bh(&adapter->mcc_lock); return status; @@ -1860,7 +1875,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter, cpu_to_le32(set_eqd[i].delay_multiplier); } - be_mcc_notify(adapter); + status = be_mcc_notify(adapter); err: spin_unlock_bh(&adapter->mcc_lock); return status; @@ -1953,7 +1968,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) memcpy(req->mcast_mac[i++].byte, ha->addr, ETH_ALEN); } - status = be_mcc_notify_wait(adapter); + status = be_mcc_notify(adapter); err: spin_unlock_bh(&adapter->mcc_lock); return status; @@ -2320,7 +2335,10 @@ int lancer_cmd_write_object(struct be_adapter *adapter, struct be_dma_mem *cmd, req->addr_high = cpu_to_le32(upper_32_bits(cmd->dma + sizeof(struct lancer_cmd_req_write_object))); - be_mcc_notify(adapter); + status = be_mcc_notify(adapter); + if (status) + goto err_unlock; + spin_unlock_bh(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, @@ -2491,7 +2509,10 @@ int be_cmd_write_flashrom(struct be_adapter *adapter, struct be_dma_mem *cmd, req->params.op_code = cpu_to_le32(flash_opcode); req->params.data_buf_size = cpu_to_le32(buf_size); - be_mcc_notify(adapter); + status = be_mcc_notify(adapter); + if (status) + goto err_unlock; + spin_unlock_bh(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, @@ -2585,7 +2606,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, wrb = wrb_from_mccq(adapter); if (!wrb) { status = -EBUSY; - goto err; + goto err_unlock; } req = embedded_payload(wrb); @@ -2599,8 +2620,19 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, req->loopback_type = loopback_type; req->loopback_state = enable; - status = be_mcc_notify_wait(adapter); -err: + status = be_mcc_notify(adapter); + if (status) + goto err_unlock; + + spin_unlock_bh(&adapter->mcc_lock); + + if (!wait_for_completion_timeout(&adapter->et_cmd_compl, + msecs_to_jiffies(SET_LB_MODE_TIMEOUT))) + status = -ETIMEDOUT; + + return status; + +err_unlock: spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2636,7 +2668,9 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, req->num_pkts = cpu_to_le32(num_pkts); req->loopback_type = cpu_to_le32(loopback_type); - be_mcc_notify(adapter); + status = be_mcc_notify(adapter); + if (status) + goto err; spin_unlock_bh(&adapter->mcc_lock); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 2716e6f30d9a..a4479f7488d3 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -1495,6 +1495,8 @@ struct be_cmd_resp_acpi_wol_magic_config_v1 { #define BE_PME_D3COLD_CAP 0x80 /********************** LoopBack test *********************/ +#define SET_LB_MODE_TIMEOUT 12000 + struct be_cmd_req_loopback_test { struct be_cmd_req_hdr hdr; u32 loopback_type; @@ -1758,6 +1760,7 @@ struct be_cmd_req_set_mac_list { /*********************** HSW Config ***********************/ #define PORT_FWD_TYPE_VEPA 0x3 #define PORT_FWD_TYPE_VEB 0x2 +#define PORT_FWD_TYPE_PASSTHRU 0x1 #define ENABLE_MAC_SPOOFCHK 0x2 #define DISABLE_MAC_SPOOFCHK 0x3 diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index b2476dbfd103..d20ff054c1f7 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -847,10 +847,21 @@ err: static u64 be_loopback_test(struct be_adapter *adapter, u8 loopback_type, u64 *status) { - be_cmd_set_loopback(adapter, adapter->hba_port_num, loopback_type, 1); + int ret; + + ret = be_cmd_set_loopback(adapter, adapter->hba_port_num, + loopback_type, 1); + if (ret) + return ret; + *status = be_cmd_loopback_test(adapter, adapter->hba_port_num, loopback_type, 1500, 2, 0xabc); - be_cmd_set_loopback(adapter, adapter->hba_port_num, BE_NO_LOOPBACK, 1); + + ret = be_cmd_set_loopback(adapter, adapter->hba_port_num, + BE_NO_LOOPBACK, 1); + if (ret) + return ret; + return *status; } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 6f642426308c..c996dd76f546 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1254,7 +1254,7 @@ static bool be_send_pkt_to_bmc(struct be_adapter *adapter, if (is_udp_pkt((*skb))) { struct udphdr *udp = udp_hdr((*skb)); - switch (udp->dest) { + switch (ntohs(udp->dest)) { case DHCP_CLIENT_PORT: os2bmc = is_dhcp_client_filt_enabled(adapter); goto done; @@ -3529,15 +3529,15 @@ err: static int be_setup_wol(struct be_adapter *adapter, bool enable) { + struct device *dev = &adapter->pdev->dev; struct be_dma_mem cmd; - int status = 0; u8 mac[ETH_ALEN]; + int status; eth_zero_addr(mac); cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config); - cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, - GFP_KERNEL); + cmd.va = dma_zalloc_coherent(dev, cmd.size, &cmd.dma, GFP_KERNEL); if (!cmd.va) return -ENOMEM; @@ -3546,24 +3546,18 @@ static int be_setup_wol(struct be_adapter *adapter, bool enable) PCICFG_PM_CONTROL_OFFSET, PCICFG_PM_CONTROL_MASK); if (status) { - dev_err(&adapter->pdev->dev, - "Could not enable Wake-on-lan\n"); - dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, - cmd.dma); - return status; + dev_err(dev, "Could not enable Wake-on-lan\n"); + goto err; } - status = be_cmd_enable_magic_wol(adapter, - adapter->netdev->dev_addr, - &cmd); - pci_enable_wake(adapter->pdev, PCI_D3hot, 1); - pci_enable_wake(adapter->pdev, PCI_D3cold, 1); } else { - status = be_cmd_enable_magic_wol(adapter, mac, &cmd); - pci_enable_wake(adapter->pdev, PCI_D3hot, 0); - pci_enable_wake(adapter->pdev, PCI_D3cold, 0); + ether_addr_copy(mac, adapter->netdev->dev_addr); } - dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); + status = be_cmd_enable_magic_wol(adapter, mac, &cmd); + pci_enable_wake(adapter->pdev, PCI_D3hot, enable); + pci_enable_wake(adapter->pdev, PCI_D3cold, enable); +err: + dma_free_coherent(dev, cmd.size, cmd.va, cmd.dma); return status; } @@ -4924,7 +4918,7 @@ static bool be_check_ufi_compatibility(struct be_adapter *adapter, { if (!fhdr) { dev_err(&adapter->pdev->dev, "Invalid FW UFI file"); - return -1; + return false; } /* First letter of the build version is used to identify @@ -5079,9 +5073,6 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, int status = 0; u8 hsw_mode; - if (!sriov_enabled(adapter)) - return 0; - /* BE and Lancer chips support VEB mode only */ if (BEx_chip(adapter) || lancer_chip(adapter)) { hsw_mode = PORT_FWD_TYPE_VEB; @@ -5091,6 +5082,9 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, NULL); if (status) return 0; + + if (hsw_mode == PORT_FWD_TYPE_PASSTHRU) + return 0; } return ndo_dflt_bridge_getlink(skb, pid, seq, dev, @@ -5813,7 +5807,6 @@ static int be_pci_resume(struct pci_dev *pdev) if (status) return status; - pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); status = be_resume(adapter); @@ -5893,7 +5886,6 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev) return PCI_ERS_RESULT_DISCONNECT; pci_set_master(pdev); - pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); /* Check if card is ok and fw is ready */ diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index d49bee38cd31..cc2d8b4b18e3 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -965,7 +965,6 @@ static struct platform_driver hip04_mac_driver = { .remove = hip04_remove, .driver = { .name = DRV_NAME, - .owner = THIS_MODULE, .of_match_table = hip04_mac_match, }, }; diff --git a/drivers/net/ethernet/hisilicon/hip04_mdio.c b/drivers/net/ethernet/hisilicon/hip04_mdio.c index b3bac25db99c..fca0a5be1f0f 100644 --- a/drivers/net/ethernet/hisilicon/hip04_mdio.c +++ b/drivers/net/ethernet/hisilicon/hip04_mdio.c @@ -174,7 +174,6 @@ static struct platform_driver hip04_mdio_driver = { .remove = hip04_mdio_remove, .driver = { .name = "hip04-mdio", - .owner = THIS_MODULE, .of_match_table = hip04_mdio_match, }, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 7a4f20bb7fcb..12c65e1ad6a9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -917,7 +917,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (dev->features & NETIF_F_RXHASH) skb_set_hash(gro_skb, be32_to_cpu(cqe->immed_rss_invalid), - PKT_HASH_TYPE_L3); + (ip_summed == CHECKSUM_UNNECESSARY) ? + PKT_HASH_TYPE_L4 : + PKT_HASH_TYPE_L3); skb_record_rx_queue(gro_skb, cq->ring); skb_mark_napi_id(gro_skb, &cq->napi); @@ -963,7 +965,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (dev->features & NETIF_F_RXHASH) skb_set_hash(skb, be32_to_cpu(cqe->immed_rss_invalid), - PKT_HASH_TYPE_L3); + (ip_summed == CHECKSUM_UNNECESSARY) ? + PKT_HASH_TYPE_L4 : + PKT_HASH_TYPE_L3); if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) && diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 12fbfcb44d8a..d76f4257e305 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2907,6 +2907,8 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, { u64 dev_flags = dev->flags; int err = 0; + int fw_enabled_sriov_vfs = min(pci_sriov_get_totalvfs(pdev), + MLX4_MAX_NUM_VF); if (reset_flow) { dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs), @@ -2932,6 +2934,12 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, } if (!(dev->flags & MLX4_FLAG_SRIOV)) { + if (total_vfs > fw_enabled_sriov_vfs) { + mlx4_err(dev, "requested vfs (%d) > available vfs (%d). Continuing without SR_IOV\n", + total_vfs, fw_enabled_sriov_vfs); + err = -ENOMEM; + goto disable_sriov; + } mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs); err = pci_enable_sriov(pdev, total_vfs); } @@ -3413,20 +3421,20 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, goto err_disable_pdev; } } - if (total_vfs >= MLX4_MAX_NUM_VF) { + if (total_vfs > MLX4_MAX_NUM_VF) { dev_err(&pdev->dev, - "Requested more VF's (%d) than allowed (%d)\n", - total_vfs, MLX4_MAX_NUM_VF - 1); + "Requested more VF's (%d) than allowed by hw (%d)\n", + total_vfs, MLX4_MAX_NUM_VF); err = -EINVAL; goto err_disable_pdev; } for (i = 0; i < MLX4_MAX_PORTS; i++) { - if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) { + if (nvfs[i] + nvfs[2] > MLX4_MAX_NUM_VF_P_PORT) { dev_err(&pdev->dev, - "Requested more VF's (%d) for port (%d) than allowed (%d)\n", + "Requested more VF's (%d) for port (%d) than allowed by driver (%d)\n", nvfs[i] + nvfs[2], i + 1, - MLX4_MAX_NUM_VF_P_PORT - 1); + MLX4_MAX_NUM_VF_P_PORT); err = -EINVAL; goto err_disable_pdev; } diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index c28111749e1f..2d1b94274079 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -8226,31 +8226,7 @@ static void s2io_rem_nic(struct pci_dev *pdev) pci_disable_device(pdev); } -/** - * s2io_starter - Entry point for the driver - * Description: This function is the entry point for the driver. It verifies - * the module loadable parameters and initializes PCI configuration space. - */ - -static int __init s2io_starter(void) -{ - return pci_register_driver(&s2io_driver); -} - -/** - * s2io_closer - Cleanup routine for the driver - * Description: This function is the cleanup routine for the driver. It - * unregisters the driver. - */ - -static __exit void s2io_closer(void) -{ - pci_unregister_driver(&s2io_driver); - DBG_PRINT(INIT_DBG, "cleanup done\n"); -} - -module_init(s2io_starter); -module_exit(s2io_closer); +module_pci_driver(s2io_driver); static int check_L2_lro_capable(u8 *buffer, struct iphdr **ip, struct tcphdr **tcp, struct RxD_t *rxdp, diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h index d89b6ed82c51..6c5997dc8afc 100644 --- a/drivers/net/ethernet/neterion/s2io.h +++ b/drivers/net/ethernet/neterion/s2io.h @@ -1085,8 +1085,6 @@ static void s2io_txpic_intr_handle(struct s2io_nic *sp); static void tx_intr_handler(struct fifo_info *fifo_data); static void s2io_handle_errors(void * dev_id); -static int s2io_starter(void); -static void s2io_closer(void); static void s2io_tx_watchdog(struct net_device *dev); static void s2io_set_multicast(struct net_device *dev); static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp); diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 2d8578cade03..c0051673c9fa 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -1818,6 +1818,30 @@ rocker_cmd_set_port_settings_macaddr_prep(const struct rocker_port *rocker_port, } static int +rocker_cmd_set_port_settings_mtu_prep(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + void *priv) +{ + int mtu = *(int *)priv; + struct rocker_tlv *cmd_info; + + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, + ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS)) + return -EMSGSIZE; + cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); + if (!cmd_info) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, + rocker_port->pport)) + return -EMSGSIZE; + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_MTU, + mtu)) + return -EMSGSIZE; + rocker_tlv_nest_end(desc_info, cmd_info); + return 0; +} + +static int rocker_cmd_set_port_learning_prep(const struct rocker_port *rocker_port, struct rocker_desc_info *desc_info, void *priv) @@ -1874,6 +1898,14 @@ static int rocker_cmd_set_port_settings_macaddr(struct rocker_port *rocker_port, macaddr, NULL, NULL); } +static int rocker_cmd_set_port_settings_mtu(struct rocker_port *rocker_port, + int mtu) +{ + return rocker_cmd_exec(rocker_port, SWITCHDEV_TRANS_NONE, 0, + rocker_cmd_set_port_settings_mtu_prep, + &mtu, NULL, NULL); +} + static int rocker_port_set_learning(struct rocker_port *rocker_port, enum switchdev_trans trans) { @@ -4152,6 +4184,34 @@ static int rocker_port_set_mac_address(struct net_device *dev, void *p) return 0; } +static int rocker_port_change_mtu(struct net_device *dev, int new_mtu) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + int running = netif_running(dev); + int err; + +#define ROCKER_PORT_MIN_MTU 68 +#define ROCKER_PORT_MAX_MTU 9000 + + if (new_mtu < ROCKER_PORT_MIN_MTU || new_mtu > ROCKER_PORT_MAX_MTU) + return -EINVAL; + + if (running) + rocker_port_stop(dev); + + netdev_info(dev, "MTU change from %d to %d\n", dev->mtu, new_mtu); + dev->mtu = new_mtu; + + err = rocker_cmd_set_port_settings_mtu(rocker_port, new_mtu); + if (err) + return err; + + if (running) + err = rocker_port_open(dev); + + return err; +} + static int rocker_port_get_phys_port_name(struct net_device *dev, char *buf, size_t len) { @@ -4172,6 +4232,7 @@ static const struct net_device_ops rocker_port_netdev_ops = { .ndo_stop = rocker_port_stop, .ndo_start_xmit = rocker_port_xmit, .ndo_set_mac_address = rocker_port_set_mac_address, + .ndo_change_mtu = rocker_port_change_mtu, .ndo_bridge_getlink = switchdev_port_bridge_getlink, .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_dellink = switchdev_port_bridge_dellink, diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index c61fbf968036..08b2c3d96188 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -159,6 +159,7 @@ enum { ROCKER_TLV_CMD_PORT_SETTINGS_MODE, /* u8 */ ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, /* u8 */ ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME, /* binary */ + ROCKER_TLV_CMD_PORT_SETTINGS_MTU, /* u16 */ __ROCKER_TLV_CMD_PORT_SETTINGS_MAX, ROCKER_TLV_CMD_PORT_SETTINGS_MAX = diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 5ec4ed3f6c8d..3e47202b9010 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -2142,7 +2142,6 @@ MODULE_DEVICE_TABLE(of, of_match); static struct platform_driver netcp_driver = { .driver = { .name = "netcp-1.0", - .owner = THIS_MODULE, .of_match_table = of_match, }, .probe = netcp_probe, diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index dd4544085db3..26cd14ccf4d5 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -589,6 +589,7 @@ struct nvsp_message { #define NETVSC_MTU 65536 +#define NETVSC_MTU_MIN 68 #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 358475ed9b59..b855ba9a507d 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -743,8 +743,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) limit = NETVSC_MTU - ETH_HLEN; - /* Hyper-V hosts don't support MTU < ETH_DATA_LEN (1500) */ - if (mtu < ETH_DATA_LEN || mtu > limit) + if (mtu < NETVSC_MTU_MIN || mtu > limit) return -EINVAL; nvdev->start_remove = true; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 236aeb76ef22..2e40417a8087 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1054,7 +1054,7 @@ int rndis_filter_device_add(struct hv_device *dev, ret = rndis_filter_query_device(rndis_device, RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE, &mtu, &size); - if (ret == 0 && size == sizeof(u32)) + if (ret == 0 && size == sizeof(u32) && mtu < net_device->ndev->mtu) net_device->ndev->mtu = mtu; /* Get the mac address */ diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 94570aace241..cc56fac3c3f8 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -38,69 +38,68 @@ #include <net/net_namespace.h> #define TX_Q_LIMIT 32 -struct ifb_private { +struct ifb_q_private { + struct net_device *dev; struct tasklet_struct ifb_tasklet; - int tasklet_pending; - - struct u64_stats_sync rsync; + int tasklet_pending; + int txqnum; struct sk_buff_head rq; - u64 rx_packets; - u64 rx_bytes; + u64 rx_packets; + u64 rx_bytes; + struct u64_stats_sync rsync; struct u64_stats_sync tsync; + u64 tx_packets; + u64 tx_bytes; struct sk_buff_head tq; - u64 tx_packets; - u64 tx_bytes; -}; +} ____cacheline_aligned_in_smp; -static int numifbs = 2; +struct ifb_dev_private { + struct ifb_q_private *tx_private; +}; -static void ri_tasklet(unsigned long dev); static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev); static int ifb_open(struct net_device *dev); static int ifb_close(struct net_device *dev); -static void ri_tasklet(unsigned long dev) +static void ifb_ri_tasklet(unsigned long _txp) { - struct net_device *_dev = (struct net_device *)dev; - struct ifb_private *dp = netdev_priv(_dev); + struct ifb_q_private *txp = (struct ifb_q_private *)_txp; struct netdev_queue *txq; struct sk_buff *skb; - txq = netdev_get_tx_queue(_dev, 0); - if ((skb = skb_peek(&dp->tq)) == NULL) { - if (__netif_tx_trylock(txq)) { - skb_queue_splice_tail_init(&dp->rq, &dp->tq); - __netif_tx_unlock(txq); - } else { - /* reschedule */ + txq = netdev_get_tx_queue(txp->dev, txp->txqnum); + skb = skb_peek(&txp->tq); + if (!skb) { + if (!__netif_tx_trylock(txq)) goto resched; - } + skb_queue_splice_tail_init(&txp->rq, &txp->tq); + __netif_tx_unlock(txq); } - while ((skb = __skb_dequeue(&dp->tq)) != NULL) { + while ((skb = __skb_dequeue(&txp->tq)) != NULL) { u32 from = G_TC_FROM(skb->tc_verd); skb->tc_verd = 0; skb->tc_verd = SET_TC_NCLS(skb->tc_verd); - u64_stats_update_begin(&dp->tsync); - dp->tx_packets++; - dp->tx_bytes += skb->len; - u64_stats_update_end(&dp->tsync); + u64_stats_update_begin(&txp->tsync); + txp->tx_packets++; + txp->tx_bytes += skb->len; + u64_stats_update_end(&txp->tsync); rcu_read_lock(); - skb->dev = dev_get_by_index_rcu(dev_net(_dev), skb->skb_iif); + skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif); if (!skb->dev) { rcu_read_unlock(); dev_kfree_skb(skb); - _dev->stats.tx_dropped++; - if (skb_queue_len(&dp->tq) != 0) + txp->dev->stats.tx_dropped++; + if (skb_queue_len(&txp->tq) != 0) goto resched; break; } rcu_read_unlock(); - skb->skb_iif = _dev->ifindex; + skb->skb_iif = txp->dev->ifindex; if (from & AT_EGRESS) { dev_queue_xmit(skb); @@ -112,10 +111,11 @@ static void ri_tasklet(unsigned long dev) } if (__netif_tx_trylock(txq)) { - if ((skb = skb_peek(&dp->rq)) == NULL) { - dp->tasklet_pending = 0; - if (netif_queue_stopped(_dev)) - netif_wake_queue(_dev); + skb = skb_peek(&txp->rq); + if (!skb) { + txp->tasklet_pending = 0; + if (netif_tx_queue_stopped(txq)) + netif_tx_wake_queue(txq); } else { __netif_tx_unlock(txq); goto resched; @@ -123,8 +123,8 @@ static void ri_tasklet(unsigned long dev) __netif_tx_unlock(txq); } else { resched: - dp->tasklet_pending = 1; - tasklet_schedule(&dp->ifb_tasklet); + txp->tasklet_pending = 1; + tasklet_schedule(&txp->ifb_tasklet); } } @@ -132,29 +132,58 @@ resched: static struct rtnl_link_stats64 *ifb_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { - struct ifb_private *dp = netdev_priv(dev); + struct ifb_dev_private *dp = netdev_priv(dev); + struct ifb_q_private *txp = dp->tx_private; unsigned int start; - - do { - start = u64_stats_fetch_begin_irq(&dp->rsync); - stats->rx_packets = dp->rx_packets; - stats->rx_bytes = dp->rx_bytes; - } while (u64_stats_fetch_retry_irq(&dp->rsync, start)); - - do { - start = u64_stats_fetch_begin_irq(&dp->tsync); - - stats->tx_packets = dp->tx_packets; - stats->tx_bytes = dp->tx_bytes; - - } while (u64_stats_fetch_retry_irq(&dp->tsync, start)); - + u64 packets, bytes; + int i; + + for (i = 0; i < dev->num_tx_queues; i++,txp++) { + do { + start = u64_stats_fetch_begin_irq(&txp->rsync); + packets = txp->rx_packets; + bytes = txp->rx_bytes; + } while (u64_stats_fetch_retry_irq(&txp->rsync, start)); + stats->rx_packets += packets; + stats->rx_bytes += bytes; + + do { + start = u64_stats_fetch_begin_irq(&txp->tsync); + packets = txp->tx_packets; + bytes = txp->tx_bytes; + } while (u64_stats_fetch_retry_irq(&txp->tsync, start)); + stats->tx_packets += packets; + stats->tx_bytes += bytes; + } stats->rx_dropped = dev->stats.rx_dropped; stats->tx_dropped = dev->stats.tx_dropped; return stats; } +static int ifb_dev_init(struct net_device *dev) +{ + struct ifb_dev_private *dp = netdev_priv(dev); + struct ifb_q_private *txp; + int i; + + txp = kcalloc(dev->num_tx_queues, sizeof(*txp), GFP_KERNEL); + if (!txp) + return -ENOMEM; + dp->tx_private = txp; + for (i = 0; i < dev->num_tx_queues; i++,txp++) { + txp->txqnum = i; + txp->dev = dev; + __skb_queue_head_init(&txp->rq); + __skb_queue_head_init(&txp->tq); + u64_stats_init(&txp->rsync); + u64_stats_init(&txp->tsync); + tasklet_init(&txp->ifb_tasklet, ifb_ri_tasklet, + (unsigned long)txp); + netif_tx_start_queue(netdev_get_tx_queue(dev, i)); + } + return 0; +} static const struct net_device_ops ifb_netdev_ops = { .ndo_open = ifb_open, @@ -162,6 +191,7 @@ static const struct net_device_ops ifb_netdev_ops = { .ndo_get_stats64 = ifb_stats64, .ndo_start_xmit = ifb_xmit, .ndo_validate_addr = eth_validate_addr, + .ndo_init = ifb_dev_init, }; #define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | \ @@ -169,10 +199,24 @@ static const struct net_device_ops ifb_netdev_ops = { NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX | \ NETIF_F_HW_VLAN_STAG_TX) +static void ifb_dev_free(struct net_device *dev) +{ + struct ifb_dev_private *dp = netdev_priv(dev); + struct ifb_q_private *txp = dp->tx_private; + int i; + + for (i = 0; i < dev->num_tx_queues; i++,txp++) { + tasklet_kill(&txp->ifb_tasklet); + __skb_queue_purge(&txp->rq); + __skb_queue_purge(&txp->tq); + } + kfree(dp->tx_private); + free_netdev(dev); +} + static void ifb_setup(struct net_device *dev) { /* Initialize the device structure. */ - dev->destructor = free_netdev; dev->netdev_ops = &ifb_netdev_ops; /* Fill in device structure with ethernet-generic values. */ @@ -188,17 +232,19 @@ static void ifb_setup(struct net_device *dev) dev->priv_flags &= ~IFF_TX_SKB_SHARING; netif_keep_dst(dev); eth_hw_addr_random(dev); + dev->destructor = ifb_dev_free; } static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ifb_private *dp = netdev_priv(dev); + struct ifb_dev_private *dp = netdev_priv(dev); u32 from = G_TC_FROM(skb->tc_verd); + struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb); - u64_stats_update_begin(&dp->rsync); - dp->rx_packets++; - dp->rx_bytes += skb->len; - u64_stats_update_end(&dp->rsync); + u64_stats_update_begin(&txp->rsync); + txp->rx_packets++; + txp->rx_bytes += skb->len; + u64_stats_update_end(&txp->rsync); if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) { dev_kfree_skb(skb); @@ -206,14 +252,13 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } - if (skb_queue_len(&dp->rq) >= dev->tx_queue_len) { - netif_stop_queue(dev); - } + if (skb_queue_len(&txp->rq) >= dev->tx_queue_len) + netif_tx_stop_queue(netdev_get_tx_queue(dev, txp->txqnum)); - __skb_queue_tail(&dp->rq, skb); - if (!dp->tasklet_pending) { - dp->tasklet_pending = 1; - tasklet_schedule(&dp->ifb_tasklet); + __skb_queue_tail(&txp->rq, skb); + if (!txp->tasklet_pending) { + txp->tasklet_pending = 1; + tasklet_schedule(&txp->ifb_tasklet); } return NETDEV_TX_OK; @@ -221,24 +266,13 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) static int ifb_close(struct net_device *dev) { - struct ifb_private *dp = netdev_priv(dev); - - tasklet_kill(&dp->ifb_tasklet); - netif_stop_queue(dev); - __skb_queue_purge(&dp->rq); - __skb_queue_purge(&dp->tq); + netif_tx_stop_all_queues(dev); return 0; } static int ifb_open(struct net_device *dev) { - struct ifb_private *dp = netdev_priv(dev); - - tasklet_init(&dp->ifb_tasklet, ri_tasklet, (unsigned long)dev); - __skb_queue_head_init(&dp->rq); - __skb_queue_head_init(&dp->tq); - netif_start_queue(dev); - + netif_tx_start_all_queues(dev); return 0; } @@ -255,31 +289,30 @@ static int ifb_validate(struct nlattr *tb[], struct nlattr *data[]) static struct rtnl_link_ops ifb_link_ops __read_mostly = { .kind = "ifb", - .priv_size = sizeof(struct ifb_private), + .priv_size = sizeof(struct ifb_dev_private), .setup = ifb_setup, .validate = ifb_validate, }; -/* Number of ifb devices to be set up by this module. */ +/* Number of ifb devices to be set up by this module. + * Note that these legacy devices have one queue. + * Prefer something like : ip link add ifb10 numtxqueues 8 type ifb + */ +static int numifbs = 2; module_param(numifbs, int, 0); MODULE_PARM_DESC(numifbs, "Number of ifb devices"); static int __init ifb_init_one(int index) { struct net_device *dev_ifb; - struct ifb_private *dp; int err; - dev_ifb = alloc_netdev(sizeof(struct ifb_private), "ifb%d", + dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d", NET_NAME_UNKNOWN, ifb_setup); if (!dev_ifb) return -ENOMEM; - dp = netdev_priv(dev_ifb); - u64_stats_init(&dp->rsync); - u64_stats_init(&dp->tsync); - dev_ifb->rtnl_link_ops = &ifb_link_ops; err = register_netdevice(dev_ifb); if (err < 0) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index f721444c2b0a..3320a179ee36 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -48,6 +48,8 @@ #define MII_M1011_IMASK_CLEAR 0x0000 #define MII_M1011_PHY_SCR 0x10 +#define MII_M1011_PHY_SCR_MDI 0x0000 +#define MII_M1011_PHY_SCR_MDI_X 0x0020 #define MII_M1011_PHY_SCR_AUTO_CROSS 0x0060 #define MII_M1145_PHY_EXT_SR 0x1b @@ -159,6 +161,43 @@ static int marvell_config_intr(struct phy_device *phydev) return err; } +static int marvell_set_polarity(struct phy_device *phydev, int polarity) +{ + int reg; + int err; + int val; + + /* get the current settings */ + reg = phy_read(phydev, MII_M1011_PHY_SCR); + if (reg < 0) + return reg; + + val = reg; + val &= ~MII_M1011_PHY_SCR_AUTO_CROSS; + switch (polarity) { + case ETH_TP_MDI: + val |= MII_M1011_PHY_SCR_MDI; + break; + case ETH_TP_MDI_X: + val |= MII_M1011_PHY_SCR_MDI_X; + break; + case ETH_TP_MDI_AUTO: + case ETH_TP_MDI_INVALID: + default: + val |= MII_M1011_PHY_SCR_AUTO_CROSS; + break; + } + + if (val != reg) { + /* Set the new polarity value in the register */ + err = phy_write(phydev, MII_M1011_PHY_SCR, val); + if (err) + return err; + } + + return 0; +} + static int marvell_config_aneg(struct phy_device *phydev) { int err; @@ -191,8 +230,7 @@ static int marvell_config_aneg(struct phy_device *phydev) if (err < 0) return err; - err = phy_write(phydev, MII_M1011_PHY_SCR, - MII_M1011_PHY_SCR_AUTO_CROSS); + err = marvell_set_polarity(phydev, phydev->mdix); if (err < 0) return err; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index b2197b506acb..84b1fba58ac3 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -353,6 +353,8 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) phydev->duplex = cmd->duplex; + phydev->mdix = cmd->eth_tp_mdix_ctrl; + /* Restart the PHY */ phy_start_aneg(phydev); @@ -377,6 +379,7 @@ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd) cmd->transceiver = phy_is_internal(phydev) ? XCVR_INTERNAL : XCVR_EXTERNAL; cmd->autoneg = phydev->autoneg; + cmd->eth_tp_mdix_ctrl = phydev->mdix; return 0; } diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 8a495b318b6f..c6cb85a85c89 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -325,9 +325,6 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue) queue->pending_prod + queue->pending_cons; } -/* Callback from stack when TX packet can be released */ -void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success); - irqreturn_t xenvif_interrupt(int irq, void *dev_id); extern bool separate_tx_rx_irq; diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 82806c60aa42..1319a6bb6b82 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -94,7 +94,6 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb) struct inet6_skb_parm { int iif; __be16 ra; - __u16 hop; __u16 dst0; __u16 srcrt; __u16 dst1; @@ -111,6 +110,7 @@ struct inet6_skb_parm { #define IP6SKB_REROUTED 4 #define IP6SKB_ROUTERALERT 8 #define IP6SKB_FRAGMENTED 16 +#define IP6SKB_HOPBYHOP 32 }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) diff --git a/include/linux/phy.h b/include/linux/phy.h index a26c3f84b8dd..e5fb1d415961 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -424,6 +424,8 @@ struct phy_device { struct net_device *attached_dev; + u8 mdix; + void (*adjust_link)(struct net_device *dev); }; #define to_phy_device(d) container_of(d, struct phy_device, dev) diff --git a/include/net/act_api.h b/include/net/act_api.h index 3ee4c92afd1b..8d2a707a9e87 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -21,6 +21,8 @@ struct tcf_common { struct gnet_stats_rate_est64 tcfc_rate_est; spinlock_t tcfc_lock; struct rcu_head tcfc_rcu; + struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_queue __percpu *cpu_qstats; }; #define tcf_head common.tcfc_head #define tcf_index common.tcfc_index @@ -68,6 +70,17 @@ static inline void tcf_hashinfo_destroy(struct tcf_hashinfo *hf) kfree(hf->htab); } +/* Update lastuse only if needed, to avoid dirtying a cache line. + * We use a temp variable to avoid fetching jiffies twice. + */ +static inline void tcf_lastuse_update(struct tcf_t *tm) +{ + unsigned long now = jiffies; + + if (tm->lastuse != now) + tm->lastuse = now; +} + #ifdef CONFIG_NET_CLS_ACT #define ACT_P_CREATED 1 @@ -103,7 +116,7 @@ int tcf_hash_release(struct tc_action *a, int bind); u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo); int tcf_hash_check(u32 index, struct tc_action *a, int bind); int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, - int size, int bind); + int size, int bind, bool cpustats); void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); void tcf_hash_insert(struct tc_action *a); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index b73c88a19dd4..b07d126694a7 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -205,8 +205,8 @@ void inet_put_port(struct sock *sk); void inet_hashinfo_init(struct inet_hashinfo *h); -int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw); -int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw); +void __inet_hash_nolisten(struct sock *sk, struct sock *osk); +void __inet_hash(struct sock *sk, struct sock *osk); void inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 360c4802288d..879d6e5a973b 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -100,10 +100,8 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) void inet_twsk_free(struct inet_timewait_sock *tw); void inet_twsk_put(struct inet_timewait_sock *tw); -int inet_twsk_unhash(struct inet_timewait_sock *tw); - -int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, - struct inet_hashinfo *hashinfo); +void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, + struct inet_hashinfo *hashinfo); struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, struct inet_timewait_death_row *dr, @@ -113,7 +111,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo); void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo); -void inet_twsk_deschedule(struct inet_timewait_sock *tw); +void inet_twsk_deschedule_put(struct inet_timewait_sock *tw); void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family); diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 8d93544a2d2b..c0368db6df54 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -31,6 +31,7 @@ struct netns_sysctl_ipv6 { int auto_flowlabels; int icmpv6_time; int anycast_src_echo_reply; + int ip_nonlocal_bind; int fwmark_reflect; int idgen_retries; int idgen_delay; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2738f6f87908..2eab08c38e32 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -513,17 +513,20 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats, bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; } -static inline void qdisc_bstats_update_cpu(struct Qdisc *sch, - const struct sk_buff *skb) +static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, + const struct sk_buff *skb) { - struct gnet_stats_basic_cpu *bstats = - this_cpu_ptr(sch->cpu_bstats); - u64_stats_update_begin(&bstats->syncp); bstats_update(&bstats->bstats, skb); u64_stats_update_end(&bstats->syncp); } +static inline void qdisc_bstats_cpu_update(struct Qdisc *sch, + const struct sk_buff *skb) +{ + bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb); +} + static inline void qdisc_bstats_update(struct Qdisc *sch, const struct sk_buff *skb) { @@ -547,16 +550,24 @@ static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count) sch->qstats.drops += count; } -static inline void qdisc_qstats_drop(struct Qdisc *sch) +static inline void qstats_drop_inc(struct gnet_stats_queue *qstats) { - sch->qstats.drops++; + qstats->drops++; } -static inline void qdisc_qstats_drop_cpu(struct Qdisc *sch) +static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats) { - struct gnet_stats_queue *qstats = this_cpu_ptr(sch->cpu_qstats); + qstats->overlimits++; +} - qstats->drops++; +static inline void qdisc_qstats_drop(struct Qdisc *sch) +{ + qstats_drop_inc(&sch->qstats); +} + +static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch) +{ + qstats_drop_inc(this_cpu_ptr(sch->cpu_qstats)); } static inline void qdisc_qstats_overlimit(struct Qdisc *sch) diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index 9fc9b578908a..592a6bc02b0b 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -6,9 +6,10 @@ struct tcf_gact { struct tcf_common common; #ifdef CONFIG_GACT_PROB - u16 tcfg_ptype; - u16 tcfg_pval; - int tcfg_paction; + u16 tcfg_ptype; + u16 tcfg_pval; + int tcfg_paction; + atomic_t packets; #endif }; #define to_gact(a) \ diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 4dd77a1c106b..dae96bae1c19 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -8,7 +8,7 @@ struct tcf_mirred { int tcfm_eaction; int tcfm_ifindex; int tcfm_ok_push; - struct net_device *tcfm_dev; + struct net_device __rcu *tcfm_dev; struct list_head tcfm_list; }; #define to_mirred(a) \ diff --git a/include/net/tcp.h b/include/net/tcp.h index 950cfecaad3c..364426a2be5a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -989,6 +989,11 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) #define TCP_INFINITE_SSTHRESH 0x7fffffff +static inline bool tcp_in_slow_start(const struct tcp_sock *tp) +{ + return tp->snd_cwnd < tp->snd_ssthresh; +} + static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) { return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; @@ -1065,7 +1070,7 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); /* If in slow start, ensure cwnd grows to twice what was ACKed. */ - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) return tp->snd_cwnd < 2 * tp->max_packets_out; return tp->is_cwnd_limited; diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index 68f0ecad6c6e..1a47946f95ba 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -33,9 +33,6 @@ static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp) static inline void twsk_destructor(struct sock *sk) { - BUG_ON(sk == NULL); - BUG_ON(sk->sk_prot == NULL); - BUG_ON(sk->sk_prot->twsk_prot == NULL); if (sk->sk_prot->twsk_prot->twsk_destructor != NULL) sk->sk_prot->twsk_prot->twsk_destructor(sk); } diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index eaaea6208b42..3635b7797508 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -182,6 +182,7 @@ struct br_mdb_entry { #define MDB_TEMPORARY 0 #define MDB_PERMANENT 1 __u8 state; + __u16 vid; struct { union { __be32 ip4; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c5bedc82bc1c..bf38f5e8196c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -453,7 +453,11 @@ select_insn: if (unlikely(!prog)) goto out; - ARG1 = BPF_R1; + /* ARG1 at this point is guaranteed to point to CTX from + * the verifier side due to the fact that the tail call is + * handeled like a helper, that is, bpf_tail_call_proto, + * where arg1_type is ARG_PTR_TO_CTX. + */ insn = prog->insnsi; goto select_insn; out: diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 7f58c735d745..9198f28a5528 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -3674,6 +3674,9 @@ static struct bpf_test tests[] = { .u.insns_int = { BPF_LD_IMM64(R0, 0x0123456789abcdefLL), BPF_ENDIAN(BPF_FROM_BE, R0, 32), + BPF_ALU64_REG(BPF_MOV, R1, R0), + BPF_ALU64_IMM(BPF_RSH, R1, 32), + BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */ BPF_EXIT_INSN(), }, INTERNAL, @@ -3708,6 +3711,9 @@ static struct bpf_test tests[] = { .u.insns_int = { BPF_LD_IMM64(R0, 0x0123456789abcdefLL), BPF_ENDIAN(BPF_FROM_LE, R0, 32), + BPF_ALU64_REG(BPF_MOV, R1, R0), + BPF_ALU64_IMM(BPF_RSH, R1, 32), + BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */ BPF_EXIT_INSN(), }, INTERNAL, diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index c11cf2611db0..9f7cdd27b762 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -85,6 +85,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, memset(&e, 0, sizeof(e)); e.ifindex = port->dev->ifindex; e.state = p->state; + e.vid = p->addr.vid; if (p->addr.proto == htons(ETH_P_IP)) e.addr.u.ip4 = p->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) @@ -230,7 +231,7 @@ errout: } void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, - struct br_ip *group, int type) + struct br_ip *group, int type, u8 state) { struct br_mdb_entry entry; @@ -241,6 +242,8 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, #if IS_ENABLED(CONFIG_IPV6) entry.addr.u.ip6 = group->u.ip6; #endif + entry.state = state; + entry.vid = group->vid; __br_mdb_notify(dev, &entry, type); } @@ -263,6 +266,8 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry) return false; if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) return false; + if (entry->vid >= VLAN_VID_MASK) + return false; return true; } @@ -351,7 +356,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, if (state == MDB_TEMPORARY) mod_timer(&p->timer, now + br->multicast_membership_interval); - br_mdb_notify(br->dev, port, group, RTM_NEWMDB); + br_mdb_notify(br->dev, port, group, RTM_NEWMDB, state); return 0; } @@ -375,6 +380,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, return -EINVAL; memset(&ip, 0, sizeof(ip)); + ip.vid = entry->vid; ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; @@ -422,6 +428,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) return -EINVAL; memset(&ip, 0, sizeof(ip)); + ip.vid = entry->vid; ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 742a6c27d7a2..5a44cd9473f2 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -694,7 +694,7 @@ static int br_multicast_add_group(struct net_bridge *br, if (unlikely(!p)) goto err; rcu_assign_pointer(*pp, p); - br_mdb_notify(br->dev, port, group, RTM_NEWMDB); + br_mdb_notify(br->dev, port, group, RTM_NEWMDB, MDB_TEMPORARY); found: mod_timer(&p->timer, now + br->multicast_membership_interval); @@ -1439,8 +1439,9 @@ br_multicast_leave_group(struct net_bridge *br, rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); + br_mdb_notify(br->dev, port, group, RTM_DELMDB, + p->state); call_rcu_bh(&p->rcu, br_multicast_free_pg); - br_mdb_notify(br->dev, port, group, RTM_DELMDB); if (!mp->ports && !mp->mglist && netif_running(br->dev)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 8b21146b24a0..c73fd785654d 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -488,7 +488,7 @@ br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group, void br_mdb_init(void); void br_mdb_uninit(void); void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, - struct br_ip *group, int type); + struct br_ip *group, int type, u8 state); #define mlock_dereference(X, br) \ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) diff --git a/net/core/dev.c b/net/core/dev.c index a8e4dd430285..69445a33ace6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3645,7 +3645,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, qdisc_skb_cb(skb)->pkt_len = skb->len; skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - qdisc_bstats_update_cpu(cl->q, skb); + qdisc_bstats_cpu_update(cl->q, skb); switch (tc_classify(skb, cl, &cl_res)) { case TC_ACT_OK: @@ -3653,7 +3653,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, skb->tc_index = TC_H_MIN(cl_res.classid); break; case TC_ACT_SHOT: - qdisc_qstats_drop_cpu(cl->q); + qdisc_qstats_cpu_drop(cl->q); case TC_ACT_STOLEN: case TC_ACT_QUEUED: kfree_skb(skb); diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 43d3dd62fcc8..42689d5c468c 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -60,11 +60,15 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) struct phy_device *phydev; unsigned int type; + if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->drv) + return false; + if (skb_headroom(skb) < ETH_HLEN) return false; + __skb_push(skb, ETH_HLEN); - type = classify(skb); + type = ptp_classify_raw(skb); __skb_pull(skb, ETH_HLEN); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 5f9b063bbe8a..f8b3701a6c3c 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -343,7 +343,6 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw = NULL; - int twrefcnt = 0; spin_lock(lock); @@ -371,21 +370,17 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { - twrefcnt = inet_twsk_unhash(tw); + sk_nulls_del_node_init_rcu((struct sock *)tw); NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); - if (twrefcnt) - inet_twsk_put(tw); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) { *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw); - - inet_twsk_put(tw); + inet_twsk_deschedule_put(tw); } return 0; @@ -403,13 +398,12 @@ static u32 inet_sk_port_offset(const struct sock *sk) inet->inet_dport); } -int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw) +void __inet_hash_nolisten(struct sock *sk, struct sock *osk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; struct inet_ehash_bucket *head; spinlock_t *lock; - int twrefcnt = 0; WARN_ON(!sk_unhashed(sk)); @@ -420,23 +414,22 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw) spin_lock(lock); __sk_nulls_add_node_rcu(sk, list); - if (tw) { - WARN_ON(sk->sk_hash != tw->tw_hash); - twrefcnt = inet_twsk_unhash(tw); + if (osk) { + WARN_ON(sk->sk_hash != osk->sk_hash); + sk_nulls_del_node_init_rcu(osk); } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - return twrefcnt; } EXPORT_SYMBOL_GPL(__inet_hash_nolisten); -int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw) +void __inet_hash(struct sock *sk, struct sock *osk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_listen_hashbucket *ilb; if (sk->sk_state != TCP_LISTEN) - return __inet_hash_nolisten(sk, tw); + return __inet_hash_nolisten(sk, osk); WARN_ON(!sk_unhashed(sk)); ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; @@ -445,7 +438,6 @@ int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw) __sk_nulls_add_node_rcu(sk, &ilb->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); spin_unlock(&ilb->lock); - return 0; } EXPORT_SYMBOL(__inet_hash); @@ -492,7 +484,6 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct inet_bind_bucket *tb; int ret; struct net *net = sock_net(sk); - int twrefcnt = 1; if (!snum) { int i, remaining, low, high, port; @@ -560,19 +551,14 @@ ok: inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); - twrefcnt += __inet_hash_nolisten(sk, tw); + __inet_hash_nolisten(sk, (struct sock *)tw); } if (tw) - twrefcnt += inet_twsk_bind_unhash(tw, hinfo); + inet_twsk_bind_unhash(tw, hinfo); spin_unlock(&head->lock); - if (tw) { - inet_twsk_deschedule(tw); - while (twrefcnt) { - twrefcnt--; - inet_twsk_put(tw); - } - } + if (tw) + inet_twsk_deschedule_put(tw); ret = 0; goto out; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 2ffbd16b79e0..ae22cc24fbe8 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -18,28 +18,6 @@ /** - * inet_twsk_unhash - unhash a timewait socket from established hash - * @tw: timewait socket - * - * unhash a timewait socket from established hash, if hashed. - * ehash lock must be held by caller. - * Returns 1 if caller should call inet_twsk_put() after lock release. - */ -int inet_twsk_unhash(struct inet_timewait_sock *tw) -{ - if (hlist_nulls_unhashed(&tw->tw_node)) - return 0; - - hlist_nulls_del_rcu(&tw->tw_node); - sk_nulls_node_init(&tw->tw_node); - /* - * We cannot call inet_twsk_put() ourself under lock, - * caller must call it for us. - */ - return 1; -} - -/** * inet_twsk_bind_unhash - unhash a timewait socket from bind hash * @tw: timewait socket * @hashinfo: hashinfo pointer @@ -48,35 +26,29 @@ int inet_twsk_unhash(struct inet_timewait_sock *tw) * bind hash lock must be held by caller. * Returns 1 if caller should call inet_twsk_put() after lock release. */ -int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, +void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) { struct inet_bind_bucket *tb = tw->tw_tb; if (!tb) - return 0; + return; __hlist_del(&tw->tw_bind_node); tw->tw_tb = NULL; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); - /* - * We cannot call inet_twsk_put() ourself under lock, - * caller must call it for us. - */ - return 1; + __sock_put((struct sock *)tw); } /* Must be called with locally disabled BHs. */ static void inet_twsk_kill(struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; - struct inet_bind_hashbucket *bhead; - int refcnt; - /* Unlink from established hashes. */ spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); + struct inet_bind_hashbucket *bhead; spin_lock(lock); - refcnt = inet_twsk_unhash(tw); + sk_nulls_del_node_init_rcu((struct sock *)tw); spin_unlock(lock); /* Disassociate with bind bucket. */ @@ -84,11 +56,9 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) hashinfo->bhash_size)]; spin_lock(&bhead->lock); - refcnt += inet_twsk_bind_unhash(tw, hashinfo); + inet_twsk_bind_unhash(tw, hashinfo); spin_unlock(&bhead->lock); - BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt)); - atomic_sub(refcnt, &tw->tw_refcnt); atomic_dec(&tw->tw_dr->tw_count); inet_twsk_put(tw); } @@ -235,13 +205,17 @@ EXPORT_SYMBOL_GPL(inet_twsk_alloc); * tcp_input.c to verify this. */ -/* This is for handling early-kills of TIME_WAIT sockets. */ -void inet_twsk_deschedule(struct inet_timewait_sock *tw) +/* This is for handling early-kills of TIME_WAIT sockets. + * Warning : consume reference. + * Caller should not access tw anymore. + */ +void inet_twsk_deschedule_put(struct inet_timewait_sock *tw) { if (del_timer_sync(&tw->tw_timer)) inet_twsk_kill(tw); + inet_twsk_put(tw); } -EXPORT_SYMBOL(inet_twsk_deschedule); +EXPORT_SYMBOL(inet_twsk_deschedule_put); void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo) { @@ -311,9 +285,8 @@ restart: rcu_read_unlock(); local_bh_disable(); - inet_twsk_deschedule(tw); + inet_twsk_deschedule_put(tw); local_bh_enable(); - inet_twsk_put(tw); goto restart_rcu; } /* If the nulls value we got at the end of this lookup is diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a50dc6d408d1..4d3fffafbe24 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -522,7 +522,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, int len; int ihlen; int err; - int sum_truesize; u8 ecn; ipq_kill(qp); @@ -590,32 +589,19 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, add_frag_mem_limit(&qp->q, clone->truesize); } + skb_shinfo(head)->frag_list = head->next; skb_push(head, head->data - skb_network_header(head)); - sum_truesize = head->truesize; - for (fp = head->next; fp;) { - bool headstolen; - int delta; - struct sk_buff *next = fp->next; - - sum_truesize += fp->truesize; + for (fp=head->next; fp; fp = fp->next) { + head->data_len += fp->len; + head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); - - if (skb_try_coalesce(head, fp, &headstolen, &delta)) { - kfree_skb_partial(fp, headstolen); - } else { - if (!skb_shinfo(head)->frag_list) - skb_shinfo(head)->frag_list = fp; - head->data_len += fp->len; - head->len += fp->len; - head->truesize += fp->truesize; - } - fp = next; + head->truesize += fp->truesize; } - sub_frag_mem_limit(&qp->q, sum_truesize); + sub_frag_mem_limit(&qp->q, head->truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 05ff44b758df..e89094ab5ddb 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -363,7 +363,8 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, scoped); rcu_read_unlock(); - if (!(isk->freebind || isk->transparent || has_addr || + if (!(net->ipv6.sysctl.ip_nonlocal_bind || + isk->freebind || isk->transparent || has_addr || addr_type == IPV6_ADDR_ANY)) return -EADDRNOTAVAIL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d0362a2de3d3..04c83de4f79e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1546,7 +1546,6 @@ static int __mkroute_input(struct sk_buff *skb, struct rtable *rth; int err; struct in_device *out_dev; - unsigned int flags = 0; bool do_cache; u32 itag = 0; @@ -1610,7 +1609,7 @@ static int __mkroute_input(struct sk_buff *skb, } rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev)); - rth->rt_flags = flags; + rth->rt_flags = 0; rth->rt_type = res->type; rth->rt_is_input = 1; rth->rt_iif = 0; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index c037644eafb7..fd1405d37c14 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -146,7 +146,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); else { bictcp_update(ca, tp->snd_cwnd); diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 8c6fd3d5e40f..167b6a3e1b98 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -264,7 +264,7 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked) u32 prior_snd_cwnd; u32 incr; - if (tp->snd_cwnd < tp->snd_ssthresh && hystart_detect) + if (tcp_in_slow_start(tp) && hystart_detect) tcp_cdg_hystart_update(sk); if (after(ack, ca->rtt_seq) && ca->rtt.v64) { diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 84be008c945c..a2ed23c595cf 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -365,10 +365,8 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) */ u32 tcp_slow_start(struct tcp_sock *tp, u32 acked) { - u32 cwnd = tp->snd_cwnd + acked; + u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh); - if (cwnd > tp->snd_ssthresh) - cwnd = tp->snd_ssthresh + 1; acked -= cwnd - tp->snd_cwnd; tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); @@ -413,7 +411,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) return; /* In "safe" area, increase. */ - if (tp->snd_cwnd <= tp->snd_ssthresh) { + if (tcp_in_slow_start(tp)) { acked = tcp_slow_start(tp, acked); if (!acked) return; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 06d3d665a9fd..28011fb1f4a2 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -320,7 +320,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) { + if (tcp_in_slow_start(tp)) { if (hystart && after(ack, ca->end_seq)) bictcp_hystart_reset(sk); acked = tcp_slow_start(tp, acked); @@ -439,7 +439,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) ca->delay_min = delay; /* hystart triggers when cwnd is larger than some threshold */ - if (hystart && tp->snd_cwnd <= tp->snd_ssthresh && + if (hystart && tcp_in_slow_start(tp) && tp->snd_cwnd >= hystart_low_window) hystart_update(sk, delay); } diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 882c08aae2f5..db7842495a64 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -116,7 +116,7 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); else { /* Update AIMD parameters. diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 58469fff6c18..82f0d9ed60f5 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -236,7 +236,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); else { /* In dangerous area, increase slowly. diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index f963b274f2b0..083831e359df 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -112,7 +112,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked) rho_fractions = ca->rho_3ls - (ca->rho << 3); - if (tp->snd_cwnd < tp->snd_ssthresh) { + if (tcp_in_slow_start(tp)) { /* * slow start * INC = 2^RHO - 1 diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index f71002e4db0b..2ab9bbb6faff 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -268,7 +268,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked) return; /* In slow start */ - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); else { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 684f095d196e..1578fc2a6f39 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -109,6 +109,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ +#define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ #define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ @@ -196,11 +197,13 @@ static void tcp_enter_quickack_mode(struct sock *sk) * and the session is not interactive. */ -static inline bool tcp_in_quickack_mode(const struct sock *sk) +static bool tcp_in_quickack_mode(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); + const struct dst_entry *dst = __sk_dst_get(sk); - return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; + return (dst && dst_metric(dst, RTAX_QUICKACK)) || + (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong); } static void tcp_ecn_queue_cwr(struct tcp_sock *tp) @@ -1037,7 +1040,7 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack, * highest SACK block). Also calculate the lowest snd_nxt among the remaining * retransmitted skbs to avoid some costly processing per ACKs. */ -static void tcp_mark_lost_retrans(struct sock *sk) +static void tcp_mark_lost_retrans(struct sock *sk, int *flag) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -1078,7 +1081,7 @@ static void tcp_mark_lost_retrans(struct sock *sk) if (after(received_upto, ack_seq)) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - + *flag |= FLAG_LOST_RETRANS; tcp_skb_mark_lost_uncond_verify(tp, skb); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); } else { @@ -1818,7 +1821,7 @@ advance_sp: ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) tcp_update_reordering(sk, tp->fackets_out - state->reord, 0); - tcp_mark_lost_retrans(sk); + tcp_mark_lost_retrans(sk, &state->flag); tcp_verify_left_out(tp); out: @@ -2475,15 +2478,14 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) return false; } -/* The cwnd reduction in CWR and Recovery use the PRR algorithm - * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/ +/* The cwnd reduction in CWR and Recovery uses the PRR algorithm in RFC 6937. * It computes the number of packets to send (sndcnt) based on packets newly * delivered: * 1) If the packets in flight is larger than ssthresh, PRR spreads the * cwnd reductions across a full RTT. - * 2) If packets in flight is lower than ssthresh (such as due to excess - * losses and/or application stalls), do not perform any further cwnd - * reductions, but instead slow start up to ssthresh. + * 2) Otherwise PRR uses packet conservation to send as much as delivered. + * But when the retransmits are acked without further losses, PRR + * slow starts cwnd up to ssthresh to speed up the recovery. */ static void tcp_init_cwnd_reduction(struct sock *sk) { @@ -2500,7 +2502,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk) } static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, - int fast_rexmit) + int fast_rexmit, int flag) { struct tcp_sock *tp = tcp_sk(sk); int sndcnt = 0; @@ -2509,16 +2511,18 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, (tp->packets_out - tp->sacked_out); tp->prr_delivered += newly_acked_sacked; - if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { + if (delta < 0) { u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + tp->prior_cwnd - 1; sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; - } else { + } else if ((flag & FLAG_RETRANS_DATA_ACKED) && + !(flag & FLAG_LOST_RETRANS)) { sndcnt = min_t(int, delta, max_t(int, tp->prr_delivered - tp->prr_out, newly_acked_sacked) + 1); + } else { + sndcnt = min(delta, newly_acked_sacked); } - sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; } @@ -2579,7 +2583,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { tcp_try_keep_open(sk); } else { - tcp_cwnd_reduction(sk, prior_unsacked, 0); + tcp_cwnd_reduction(sk, prior_unsacked, 0, flag); } } @@ -2676,7 +2680,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) tp->prior_ssthresh = 0; tcp_init_undo(tp); - if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { + if (!tcp_in_cwnd_reduction(sk)) { if (!ece_ack) tp->prior_ssthresh = tcp_current_ssthresh(sk); tcp_init_cwnd_reduction(sk); @@ -2736,7 +2740,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) /* Undo during fast recovery after partial ACK. */ static bool tcp_try_undo_partial(struct sock *sk, const int acked, - const int prior_unsacked) + const int prior_unsacked, int flag) { struct tcp_sock *tp = tcp_sk(sk); @@ -2752,7 +2756,7 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked, * mark more packets lost or retransmit more. */ if (tp->retrans_out) { - tcp_cwnd_reduction(sk, prior_unsacked, 0); + tcp_cwnd_reduction(sk, prior_unsacked, 0, flag); return true; } @@ -2839,7 +2843,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); } else { - if (tcp_try_undo_partial(sk, acked, prior_unsacked)) + if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag)) return; /* Partial ACK arrived. Force fast retransmit. */ do_lost = tcp_is_reno(tp) || @@ -2852,9 +2856,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, break; case TCP_CA_Loss: tcp_process_loss(sk, flag, is_dupack); - if (icsk->icsk_ca_state != TCP_CA_Open) + if (icsk->icsk_ca_state != TCP_CA_Open && + !(flag & FLAG_LOST_RETRANS)) return; - /* Fall through to processing in Open state. */ + /* Change state if cwnd is undone or retransmits are lost */ default: if (tcp_is_reno(tp)) { if (flag & FLAG_SND_UNA_ADVANCED) @@ -2889,7 +2894,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, if (do_lost) tcp_update_scoreboard(sk, fast_rexmit); - tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit); + tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag); tcp_xmit_retransmit_queue(sk); } @@ -3563,10 +3568,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) &sack_state); acked -= tp->packets_out; - /* Advance cwnd if state allows */ - if (tcp_may_raise_cwnd(sk, flag)) - tcp_cong_avoid(sk, ack, acked); - if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); tcp_fastretrans_alert(sk, acked, prior_unsacked, @@ -3575,6 +3576,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); + /* Advance cwnd if state allows */ + if (tcp_may_raise_cwnd(sk, flag)) + tcp_cong_avoid(sk, ack, acked); + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { struct dst_entry *dst = __sk_dst_get(sk); if (dst) @@ -3948,7 +3953,6 @@ void tcp_reset(struct sock *sk) static void tcp_fin(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - const struct dst_entry *dst; inet_csk_schedule_ack(sk); @@ -3960,9 +3964,7 @@ static void tcp_fin(struct sock *sk) case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); - dst = __sk_dst_get(sk); - if (!dst || !dst_metric(dst, RTAX_QUICKACK)) - inet_csk(sk)->icsk_ack.pingpong = 1; + inet_csk(sk)->icsk_ack.pingpong = 1; break; case TCP_CLOSE_WAIT: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d7d4c2b79cf2..486ba96ae91a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1683,8 +1683,7 @@ do_time_wait: iph->daddr, th->dest, inet_iif(skb)); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk)); - inet_twsk_put(inet_twsk(sk)); + inet_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; goto process; } diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index a51d63a43e33..b3d64f61d922 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -461,7 +461,7 @@ void tcp_update_metrics(struct sock *sk) tcp_metric_set(tm, TCP_METRIC_CWND, tp->snd_cwnd); } - } else if (tp->snd_cwnd > tp->snd_ssthresh && + } else if (!tcp_in_slow_start(tp) && icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4bc00cb79e60..6d8795b066ac 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -147,8 +147,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, if (!th->fin || TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { kill_with_rst: - inet_twsk_deschedule(tw); - inet_twsk_put(tw); + inet_twsk_deschedule_put(tw); return TCP_TW_RST; } @@ -198,8 +197,7 @@ kill_with_rst: */ if (sysctl_tcp_rfc1337 == 0) { kill: - inet_twsk_deschedule(tw); - inet_twsk_put(tw); + inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; } } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b1c218df2c85..71057849593a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -163,7 +163,6 @@ static void tcp_event_data_sent(struct tcp_sock *tp, { struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; - const struct dst_entry *dst = __sk_dst_get(sk); if (sysctl_tcp_slow_start_after_idle && (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto)) @@ -174,9 +173,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp, /* If it is a reply for ato after last received * packet, enter pingpong mode. */ - if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato && - (!dst || !dst_metric(dst, RTAX_QUICKACK))) - icsk->icsk_ack.pingpong = 1; + if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) + icsk->icsk_ack.pingpong = 1; } /* Account for an ACK we sent. */ diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 333bcb2415ff..bf5ea9e9bbc1 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -22,7 +22,7 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); else tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 5b752f58a900..7149ebc820c7 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -649,4 +649,3 @@ void tcp_init_xmit_timers(struct sock *sk) inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, &tcp_keepalive_timer); } -EXPORT_SYMBOL(tcp_init_xmit_timers); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a6cea1d5e20d..13951c4087d4 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -225,7 +225,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) */ diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; - if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) { + if (diff > gamma && tcp_in_slow_start(tp)) { /* Going too fast. Time to slow down * and switch to congestion avoidance. */ @@ -240,7 +240,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); tp->snd_ssthresh = tcp_vegas_ssthresh(tp); - } else if (tp->snd_cwnd <= tp->snd_ssthresh) { + } else if (tcp_in_slow_start(tp)) { /* Slow start. */ tcp_slow_start(tp, acked); } else { @@ -281,7 +281,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) vegas->minRTT = 0x7fffffff; } /* Use normal slow start */ - else if (tp->snd_cwnd <= tp->snd_ssthresh) + else if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); } diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 112151eeee45..0d094b995cd9 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -150,7 +150,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd; - if (tp->snd_cwnd <= tp->snd_ssthresh) { + if (tcp_in_slow_start(tp)) { /* Slow start. */ tcp_slow_start(tp, acked); } else { diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 438a73aa777c..643f61339e7b 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -5,16 +5,15 @@ # IPv6 as module will cause a CRASH if you try to unload it menuconfig IPV6 tristate "The IPv6 protocol" - default m + default y ---help--- - This is complemental support for the IP version 6. - You will still be able to do traditional IPv4 networking as well. + Support for IP version 6 (IPv6). For general information about IPv6, see <https://en.wikipedia.org/wiki/IPv6>. - For Linux IPv6 development information, see <http://www.linux-ipv6.org>. - For specific information about IPv6 under Linux, read the HOWTO at - <http://www.bieringer.de/linux/IPv6/>. + For specific information about IPv6 under Linux, see + Documentation/networking/ipv6.txt and read the HOWTO at + <http://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/> To compile this protocol support as a module, choose M here: the module will be called ipv6. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 21c2c818df3b..4ab74d56f65a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1358,15 +1358,94 @@ out: return ret; } +static void __ipv6_dev_get_saddr(struct net *net, + struct ipv6_saddr_dst *dst, + unsigned int prefs, + const struct in6_addr *saddr, + struct inet6_dev *idev, + struct ipv6_saddr_score *scores) +{ + struct ipv6_saddr_score *score = &scores[0], *hiscore = &scores[1]; + + read_lock_bh(&idev->lock); + list_for_each_entry(score->ifa, &idev->addr_list, if_list) { + int i; + + /* + * - Tentative Address (RFC2462 section 5.4) + * - A tentative address is not considered + * "assigned to an interface" in the traditional + * sense, unless it is also flagged as optimistic. + * - Candidate Source Address (section 4) + * - In any case, anycast addresses, multicast + * addresses, and the unspecified address MUST + * NOT be included in a candidate set. + */ + if ((score->ifa->flags & IFA_F_TENTATIVE) && + (!(score->ifa->flags & IFA_F_OPTIMISTIC))) + continue; + + score->addr_type = __ipv6_addr_type(&score->ifa->addr); + + if (unlikely(score->addr_type == IPV6_ADDR_ANY || + score->addr_type & IPV6_ADDR_MULTICAST)) { + net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s", + idev->dev->name); + continue; + } + + score->rule = -1; + bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX); + + for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) { + int minihiscore, miniscore; + + minihiscore = ipv6_get_saddr_eval(net, hiscore, dst, i); + miniscore = ipv6_get_saddr_eval(net, score, dst, i); + + if (minihiscore > miniscore) { + if (i == IPV6_SADDR_RULE_SCOPE && + score->scopedist > 0) { + /* + * special case: + * each remaining entry + * has too small (not enough) + * scope, because ifa entries + * are sorted by their scope + * values. + */ + goto out; + } + break; + } else if (minihiscore < miniscore) { + if (hiscore->ifa) + in6_ifa_put(hiscore->ifa); + + in6_ifa_hold(score->ifa); + + swap(hiscore, score); + + /* restore our iterator */ + score->ifa = hiscore->ifa; + + break; + } + } + } +out: + read_unlock_bh(&idev->lock); +} + int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) { - struct ipv6_saddr_score scores[2], - *score = &scores[0], *hiscore = &scores[1]; + struct ipv6_saddr_score scores[2], *hiscore = &scores[1]; struct ipv6_saddr_dst dst; + struct inet6_dev *idev; struct net_device *dev; int dst_type; + bool use_oif_addr = false; dst_type = __ipv6_addr_type(daddr); dst.addr = daddr; @@ -1380,97 +1459,35 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - struct inet6_dev *idev; - - /* Candidate Source Address (section 4) - * - multicast and link-local destination address, - * the set of candidate source address MUST only - * include addresses assigned to interfaces - * belonging to the same link as the outgoing - * interface. - * (- For site-local destination addresses, the - * set of candidate source addresses MUST only - * include addresses assigned to interfaces - * belonging to the same site as the outgoing - * interface.) - */ - if (((dst_type & IPV6_ADDR_MULTICAST) || - dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && - dst.ifindex && dev->ifindex != dst.ifindex) - continue; - - idev = __in6_dev_get(dev); - if (!idev) - continue; - - read_lock_bh(&idev->lock); - list_for_each_entry(score->ifa, &idev->addr_list, if_list) { - int i; - - /* - * - Tentative Address (RFC2462 section 5.4) - * - A tentative address is not considered - * "assigned to an interface" in the traditional - * sense, unless it is also flagged as optimistic. - * - Candidate Source Address (section 4) - * - In any case, anycast addresses, multicast - * addresses, and the unspecified address MUST - * NOT be included in a candidate set. - */ - if ((score->ifa->flags & IFA_F_TENTATIVE) && - (!(score->ifa->flags & IFA_F_OPTIMISTIC))) - continue; - - score->addr_type = __ipv6_addr_type(&score->ifa->addr); + /* Candidate Source Address (section 4) + * - multicast and link-local destination address, + * the set of candidate source address MUST only + * include addresses assigned to interfaces + * belonging to the same link as the outgoing + * interface. + * (- For site-local destination addresses, the + * set of candidate source addresses MUST only + * include addresses assigned to interfaces + * belonging to the same site as the outgoing + * interface.) + */ + if (dst_dev) { + if ((dst_type & IPV6_ADDR_MULTICAST) || + dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) { + idev = __in6_dev_get(dst_dev); + use_oif_addr = true; + } + } - if (unlikely(score->addr_type == IPV6_ADDR_ANY || - score->addr_type & IPV6_ADDR_MULTICAST)) { - net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s", - dev->name); + if (use_oif_addr) { + __ipv6_dev_get_saddr(net, &dst, prefs, saddr, idev, scores); + } else { + for_each_netdev_rcu(net, dev) { + idev = __in6_dev_get(dev); + if (!idev) continue; - } - - score->rule = -1; - bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX); - - for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) { - int minihiscore, miniscore; - - minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i); - miniscore = ipv6_get_saddr_eval(net, score, &dst, i); - - if (minihiscore > miniscore) { - if (i == IPV6_SADDR_RULE_SCOPE && - score->scopedist > 0) { - /* - * special case: - * each remaining entry - * has too small (not enough) - * scope, because ifa entries - * are sorted by their scope - * values. - */ - goto try_nextdev; - } - break; - } else if (minihiscore < miniscore) { - if (hiscore->ifa) - in6_ifa_put(hiscore->ifa); - - in6_ifa_hold(score->ifa); - - swap(hiscore, score); - - /* restore our iterator */ - score->ifa = hiscore->ifa; - - break; - } - } + __ipv6_dev_get_saddr(net, &dst, prefs, saddr, idev, scores); } -try_nextdev: - read_unlock_bh(&idev->lock); } rcu_read_unlock(); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7de52b65173f..7bc92ea4ae8f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -342,7 +342,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { - if (!(inet->freebind || inet->transparent) && + if (!net->ipv6.sysctl.ip_nonlocal_bind && + !(inet->freebind || inet->transparent) && !ipv6_chk_addr(net, &addr->sin6_addr, dev, 0)) { err = -EADDRNOTAVAIL; @@ -679,8 +680,8 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, const struct ipv6_pinfo *np = inet6_sk(sk); if (np->rxopt.all) { - if ((opt->hop && (np->rxopt.bits.hopopts || - np->rxopt.bits.ohopopts)) || + if (((opt->flags & IP6SKB_HOPBYHOP) && + (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) && np->rxopt.bits.rxflow) || (opt->srcrt && (np->rxopt.bits.srcrt || diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 62d908e64eeb..50115522e80f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -558,8 +558,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, } /* HbH is allowed only once */ - if (np->rxopt.bits.hopopts && opt->hop) { - u8 *ptr = nh + opt->hop; + if (np->rxopt.bits.hopopts && (opt->flags & IP6SKB_HOPBYHOP)) { + u8 *ptr = nh + sizeof(struct ipv6hdr); put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr); } @@ -620,8 +620,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, int hlim = ipv6_hdr(skb)->hop_limit; put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); } - if (np->rxopt.bits.ohopopts && opt->hop) { - u8 *ptr = nh + opt->hop; + if (np->rxopt.bits.ohopopts && (opt->flags & IP6SKB_HOPBYHOP)) { + u8 *ptr = nh + sizeof(struct ipv6hdr); put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr); } if (np->rxopt.bits.odstopts && opt->dst0) { diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a7bbbe45570b..ce203b0402be 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -632,7 +632,7 @@ int ipv6_parse_hopopts(struct sk_buff *skb) return -1; } - opt->hop = sizeof(struct ipv6hdr); + opt->flags |= IP6SKB_HOPBYHOP; if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b4fd96de97e6..6ac8dad0138a 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -207,7 +207,6 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw = NULL; - int twrefcnt = 0; spin_lock(lock); @@ -234,21 +233,17 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { - twrefcnt = inet_twsk_unhash(tw); + sk_nulls_del_node_init_rcu((struct sock *)tw); NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); - if (twrefcnt) - inet_twsk_put(tw); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) { *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw); - - inet_twsk_put(tw); + inet_twsk_deschedule_put(tw); } return 0; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ca4700cb26c4..fdbada1569a3 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -295,7 +295,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) * unspecified and mapped address have a v4 equivalent. */ v4addr = LOOPBACK4_IPV6; - if (!(addr_type & IPV6_ADDR_MULTICAST)) { + if (!(addr_type & IPV6_ADDR_MULTICAST) && + !sock_net(sk)->ipv6.sysctl.ip_nonlocal_bind) { err = -EADDRNOTAVAIL; if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr, dev, 0)) { diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 4e705add4f18..db48aebd9c47 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -75,6 +75,13 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "ip_nonlocal_bind", + .data = &init_net.ipv6.sysctl.ip_nonlocal_bind, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -117,6 +124,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries; ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay; ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges; + ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6748c4277aff..d540846a1a79 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1481,8 +1481,7 @@ do_time_wait: ntohs(th->dest), tcp_v6_iif(skb)); if (sk2) { struct inet_timewait_sock *tw = inet_twsk(sk); - inet_twsk_deschedule(tw); - inet_twsk_put(tw); + inet_twsk_deschedule_put(tw); sk = sk2; tcp_v6_restore_cb(skb); goto process; diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index cca96cec1b68..d0c96c5ae29a 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -272,8 +272,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport, hp->source, lport ? lport : hp->dest, skb->dev, NFT_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk)); - inet_twsk_put(inet_twsk(sk)); + inet_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; } } @@ -437,8 +436,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, tgi->lport ? tgi->lport : hp->dest, skb->dev, NFT_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk)); - inet_twsk_put(inet_twsk(sk)); + inet_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; } } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index af427a3dbcba..074a32f466f8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -27,6 +27,15 @@ #include <net/act_api.h> #include <net/netlink.h> +static void free_tcf(struct rcu_head *head) +{ + struct tcf_common *p = container_of(head, struct tcf_common, tcfc_rcu); + + free_percpu(p->cpu_bstats); + free_percpu(p->cpu_qstats); + kfree(p); +} + void tcf_hash_destroy(struct tc_action *a) { struct tcf_common *p = a->priv; @@ -41,7 +50,7 @@ void tcf_hash_destroy(struct tc_action *a) * gen_estimator est_timer() might access p->tcfc_lock * or bstats, wait a RCU grace period before freeing p */ - kfree_rcu(p, tcfc_rcu); + call_rcu(&p->tcfc_rcu, free_tcf); } EXPORT_SYMBOL(tcf_hash_destroy); @@ -230,15 +239,16 @@ void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est) if (est) gen_kill_estimator(&pc->tcfc_bstats, &pc->tcfc_rate_est); - kfree_rcu(pc, tcfc_rcu); + call_rcu(&pc->tcfc_rcu, free_tcf); } EXPORT_SYMBOL(tcf_hash_cleanup); int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, - int size, int bind) + int size, int bind, bool cpustats) { struct tcf_hashinfo *hinfo = a->ops->hinfo; struct tcf_common *p = kzalloc(size, GFP_KERNEL); + int err = -ENOMEM; if (unlikely(!p)) return -ENOMEM; @@ -246,18 +256,32 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, if (bind) p->tcfc_bindcnt = 1; + if (cpustats) { + p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); + if (!p->cpu_bstats) { +err1: + kfree(p); + return err; + } + p->cpu_qstats = alloc_percpu(struct gnet_stats_queue); + if (!p->cpu_qstats) { +err2: + free_percpu(p->cpu_bstats); + goto err1; + } + } spin_lock_init(&p->tcfc_lock); INIT_HLIST_NODE(&p->tcfc_head); p->tcfc_index = index ? index : tcf_hash_new_index(hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; if (est) { - int err = gen_new_estimator(&p->tcfc_bstats, NULL, - &p->tcfc_rate_est, - &p->tcfc_lock, est); + err = gen_new_estimator(&p->tcfc_bstats, p->cpu_bstats, + &p->tcfc_rate_est, + &p->tcfc_lock, est); if (err) { - kfree(p); - return err; + free_percpu(p->cpu_qstats); + goto err2; } } @@ -615,10 +639,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (err < 0) goto errout; - if (gnet_stats_copy_basic(&d, NULL, &p->tcfc_bstats) < 0 || + if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfc_bstats) < 0 || gnet_stats_copy_rate_est(&d, &p->tcfc_bstats, &p->tcfc_rate_est) < 0 || - gnet_stats_copy_queue(&d, NULL, + gnet_stats_copy_queue(&d, p->cpu_qstats, &p->tcfc_qstats, p->tcfc_qstats.qlen) < 0) goto errout; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 1d56903fd4c7..99aa271633e9 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -281,7 +281,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (!tcf_hash_check(parm->index, act, bind)) { ret = tcf_hash_create(parm->index, est, act, - sizeof(*prog), bind); + sizeof(*prog), bind, false); if (ret < 0) goto destroy_fp; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 295d14bd6c67..f2b540220ad0 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -108,7 +108,8 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_CONNMARK_PARMS]); if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), + bind, false); if (ret) return ret; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 4cd5cf1aedf8..b07c535ba8e7 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -62,7 +62,8 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est, parm = nla_data(tb[TCA_CSUM_PARMS]); if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*p), + bind, false); if (ret) return ret; ret = ACT_P_CREATED; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 7fffc2272701..5c1b05170736 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -28,14 +28,18 @@ #ifdef CONFIG_GACT_PROB static int gact_net_rand(struct tcf_gact *gact) { - if (!gact->tcfg_pval || prandom_u32() % gact->tcfg_pval) + smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */ + if (prandom_u32() % gact->tcfg_pval) return gact->tcf_action; return gact->tcfg_paction; } static int gact_determ(struct tcf_gact *gact) { - if (!gact->tcfg_pval || gact->tcf_bstats.packets % gact->tcfg_pval) + u32 pack = atomic_inc_return(&gact->packets); + + smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */ + if (pack % gact->tcfg_pval) return gact->tcf_action; return gact->tcfg_paction; } @@ -85,7 +89,8 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, #endif if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), + bind, true); if (ret) return ret; ret = ACT_P_CREATED; @@ -99,16 +104,19 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, gact = to_gact(a); - spin_lock_bh(&gact->tcf_lock); + ASSERT_RTNL(); gact->tcf_action = parm->action; #ifdef CONFIG_GACT_PROB if (p_parm) { gact->tcfg_paction = p_parm->paction; - gact->tcfg_pval = p_parm->pval; + gact->tcfg_pval = max_t(u16, 1, p_parm->pval); + /* Make sure tcfg_pval is written before tcfg_ptype + * coupled with smp_rmb() in gact_net_rand() & gact_determ() + */ + smp_wmb(); gact->tcfg_ptype = p_parm->ptype; } #endif - spin_unlock_bh(&gact->tcf_lock); if (ret == ACT_P_CREATED) tcf_hash_insert(a); return ret; @@ -118,23 +126,21 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_gact *gact = a->priv; - int action = TC_ACT_SHOT; + int action = READ_ONCE(gact->tcf_action); - spin_lock(&gact->tcf_lock); #ifdef CONFIG_GACT_PROB - if (gact->tcfg_ptype) - action = gact_rand[gact->tcfg_ptype](gact); - else - action = gact->tcf_action; -#else - action = gact->tcf_action; + { + u32 ptype = READ_ONCE(gact->tcfg_ptype); + + if (ptype) + action = gact_rand[ptype](gact); + } #endif - gact->tcf_bstats.bytes += qdisc_pkt_len(skb); - gact->tcf_bstats.packets++; + bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), skb); if (action == TC_ACT_SHOT) - gact->tcf_qstats.drops++; - gact->tcf_tm.lastuse = jiffies; - spin_unlock(&gact->tcf_lock); + qstats_drop_inc(this_cpu_ptr(gact->common.cpu_qstats)); + + tcf_lastuse_update(&gact->tcf_tm); return action; } diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index cbc8dd7dd48a..99c9cc1c7af9 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -114,7 +114,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, index = nla_get_u32(tb[TCA_IPT_INDEX]); if (!tcf_hash_check(index, a, bind) ) { - ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind); + ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind, false); if (ret) return ret; ret = ACT_P_CREATED; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index a42a3b257226..19cd8904efa0 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -35,9 +35,11 @@ static LIST_HEAD(mirred_list); static void tcf_mirred_release(struct tc_action *a, int bind) { struct tcf_mirred *m = to_mirred(a); + struct net_device *dev = rcu_dereference_protected(m->tcfm_dev, 1); + list_del(&m->tcfm_list); - if (m->tcfm_dev) - dev_put(m->tcfm_dev); + if (dev) + dev_put(dev); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { @@ -93,7 +95,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, if (!tcf_hash_check(parm->index, a, bind)) { if (dev == NULL) return -EINVAL; - ret = tcf_hash_create(parm->index, est, a, sizeof(*m), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*m), + bind, true); if (ret) return ret; ret = ACT_P_CREATED; @@ -105,18 +108,18 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } m = to_mirred(a); - spin_lock_bh(&m->tcf_lock); + ASSERT_RTNL(); m->tcf_action = parm->action; m->tcfm_eaction = parm->eaction; if (dev != NULL) { m->tcfm_ifindex = parm->ifindex; if (ret != ACT_P_CREATED) - dev_put(m->tcfm_dev); + dev_put(rcu_dereference_protected(m->tcfm_dev, 1)); dev_hold(dev); - m->tcfm_dev = dev; + rcu_assign_pointer(m->tcfm_dev, dev); m->tcfm_ok_push = ok_push; } - spin_unlock_bh(&m->tcf_lock); + if (ret == ACT_P_CREATED) { list_add(&m->tcfm_list, &mirred_list); tcf_hash_insert(a); @@ -131,20 +134,22 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, struct tcf_mirred *m = a->priv; struct net_device *dev; struct sk_buff *skb2; + int retval, err; u32 at; - int retval, err = 1; - spin_lock(&m->tcf_lock); - m->tcf_tm.lastuse = jiffies; - bstats_update(&m->tcf_bstats, skb); + tcf_lastuse_update(&m->tcf_tm); + + bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); - dev = m->tcfm_dev; - if (!dev) { - printk_once(KERN_NOTICE "tc mirred: target device is gone\n"); + rcu_read_lock(); + retval = READ_ONCE(m->tcf_action); + dev = rcu_dereference(m->tcfm_dev); + if (unlikely(!dev)) { + pr_notice_once("tc mirred: target device is gone\n"); goto out; } - if (!(dev->flags & IFF_UP)) { + if (unlikely(!(dev->flags & IFF_UP))) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); goto out; @@ -152,7 +157,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, at = G_TC_AT(skb->tc_verd); skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) + if (!skb2) goto out; if (!(at & AT_EGRESS)) { @@ -168,16 +173,13 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, skb2->dev = dev; err = dev_queue_xmit(skb2); -out: if (err) { - m->tcf_qstats.overlimits++; +out: + qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats)); if (m->tcfm_eaction != TCA_EGRESS_MIRROR) retval = TC_ACT_SHOT; - else - retval = m->tcf_action; - } else - retval = m->tcf_action; - spin_unlock(&m->tcf_lock); + } + rcu_read_unlock(); return retval; } @@ -216,14 +218,16 @@ static int mirred_device_event(struct notifier_block *unused, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct tcf_mirred *m; + ASSERT_RTNL(); if (event == NETDEV_UNREGISTER) list_for_each_entry(m, &mirred_list, tcfm_list) { - spin_lock_bh(&m->tcf_lock); - if (m->tcfm_dev == dev) { + if (rcu_access_pointer(m->tcfm_dev) == dev) { dev_put(dev); - m->tcfm_dev = NULL; + /* Note : no rcu grace period necessary, as + * net_device are already rcu protected. + */ + RCU_INIT_POINTER(m->tcfm_dev, NULL); } - spin_unlock_bh(&m->tcf_lock); } return NOTIFY_DONE; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 270a030d5fd0..5be0b3c1c5b0 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -55,7 +55,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, parm = nla_data(tb[TCA_NAT_PARMS]); if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*p), + bind, false); if (ret) return ret; ret = ACT_P_CREATED; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 17e6d6669c7f..ce8676ad892f 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -57,7 +57,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (!tcf_hash_check(parm->index, a, bind)) { if (!parm->nkeys) return -EINVAL; - ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*p), + bind, false); if (ret) return ret; p = to_pedit(a); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 6a8d9488613a..d6b708d6afdf 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -103,7 +103,8 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, defdata = nla_data(tb[TCA_DEF_DATA]); if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*d), + bind, false); if (ret) return ret; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index fcfeeaf838be..6751b5f8c046 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -99,7 +99,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_SKBEDIT_PARMS]); if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*d), + bind, false); if (ret) return ret; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index d735ecf0b1a7..796785e0bf96 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -116,7 +116,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, action = parm->v_action; if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*v), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*v), + bind, false); if (ret) return ret; diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index bdf1c1607b80..c77c872fe8ee 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -60,4 +60,29 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; +#if defined(__x86_64__) + +#define PT_REGS_PARM1(x) ((x)->di) +#define PT_REGS_PARM2(x) ((x)->si) +#define PT_REGS_PARM3(x) ((x)->dx) +#define PT_REGS_PARM4(x) ((x)->cx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->sp) +#define PT_REGS_FP(x) ((x)->bp) +#define PT_REGS_RC(x) ((x)->ax) +#define PT_REGS_SP(x) ((x)->sp) + +#elif defined(__s390x__) + +#define PT_REGS_PARM1(x) ((x)->gprs[2]) +#define PT_REGS_PARM2(x) ((x)->gprs[3]) +#define PT_REGS_PARM3(x) ((x)->gprs[4]) +#define PT_REGS_PARM4(x) ((x)->gprs[5]) +#define PT_REGS_PARM5(x) ((x)->gprs[6]) +#define PT_REGS_RET(x) ((x)->gprs[14]) +#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->gprs[2]) +#define PT_REGS_SP(x) ((x)->gprs[15]) + +#endif #endif diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c index 31620463701a..3f450a8fa1f3 100644 --- a/samples/bpf/tracex1_kern.c +++ b/samples/bpf/tracex1_kern.c @@ -29,7 +29,7 @@ int bpf_prog1(struct pt_regs *ctx) int len; /* non-portable! works for the given kernel only */ - skb = (struct sk_buff *) ctx->di; + skb = (struct sk_buff *) PT_REGS_PARM1(ctx); dev = _(skb->dev); diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c index dc50f4f2943f..b32367cfbff4 100644 --- a/samples/bpf/tracex2_kern.c +++ b/samples/bpf/tracex2_kern.c @@ -27,10 +27,10 @@ int bpf_prog2(struct pt_regs *ctx) long init_val = 1; long *value; - /* x64 specific: read ip of kfree_skb caller. + /* x64/s390x specific: read ip of kfree_skb caller. * non-portable version of __builtin_return_address(0) */ - bpf_probe_read(&loc, sizeof(loc), (void *)ctx->sp); + bpf_probe_read(&loc, sizeof(loc), (void *)PT_REGS_RET(ctx)); value = bpf_map_lookup_elem(&my_map, &loc); if (value) @@ -79,7 +79,7 @@ struct bpf_map_def SEC("maps") my_hist_map = { SEC("kprobe/sys_write") int bpf_prog3(struct pt_regs *ctx) { - long write_size = ctx->dx; /* arg3 */ + long write_size = PT_REGS_PARM3(ctx); long init_val = 1; long *value; struct hist_key key = {}; diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c index 255ff2792366..bf337fbb0947 100644 --- a/samples/bpf/tracex3_kern.c +++ b/samples/bpf/tracex3_kern.c @@ -23,7 +23,7 @@ struct bpf_map_def SEC("maps") my_map = { SEC("kprobe/blk_mq_start_request") int bpf_prog1(struct pt_regs *ctx) { - long rq = ctx->di; + long rq = PT_REGS_PARM1(ctx); u64 val = bpf_ktime_get_ns(); bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY); @@ -51,7 +51,7 @@ struct bpf_map_def SEC("maps") lat_map = { SEC("kprobe/blk_update_request") int bpf_prog2(struct pt_regs *ctx) { - long rq = ctx->di; + long rq = PT_REGS_PARM1(ctx); u64 *value, l, base; u32 index; diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4_kern.c index 126b80512228..ac4671420cf1 100644 --- a/samples/bpf/tracex4_kern.c +++ b/samples/bpf/tracex4_kern.c @@ -27,7 +27,7 @@ struct bpf_map_def SEC("maps") my_map = { SEC("kprobe/kmem_cache_free") int bpf_prog1(struct pt_regs *ctx) { - long ptr = ctx->si; + long ptr = PT_REGS_PARM2(ctx); bpf_map_delete_elem(&my_map, &ptr); return 0; @@ -36,11 +36,11 @@ int bpf_prog1(struct pt_regs *ctx) SEC("kretprobe/kmem_cache_alloc_node") int bpf_prog2(struct pt_regs *ctx) { - long ptr = ctx->ax; + long ptr = PT_REGS_RC(ctx); long ip = 0; /* get ip address of kmem_cache_alloc_node() caller */ - bpf_probe_read(&ip, sizeof(ip), (void *)(ctx->bp + sizeof(ip))); + bpf_probe_read(&ip, sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip))); struct pair v = { .val = bpf_ktime_get_ns(), diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c index b71fe07a7a7a..b3f4295bf288 100644 --- a/samples/bpf/tracex5_kern.c +++ b/samples/bpf/tracex5_kern.c @@ -24,7 +24,7 @@ int bpf_prog1(struct pt_regs *ctx) { struct seccomp_data sd = {}; - bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); /* dispatch into next BPF program depending on syscall number */ bpf_tail_call(ctx, &progs, sd.nr); @@ -42,7 +42,7 @@ PROG(__NR_write)(struct pt_regs *ctx) { struct seccomp_data sd = {}; - bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); if (sd.args[2] == 512) { char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), @@ -55,7 +55,7 @@ PROG(__NR_read)(struct pt_regs *ctx) { struct seccomp_data sd = {}; - bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); if (sd.args[2] > 128 && sd.args[2] <= 1024) { char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), |