diff options
50 files changed, 2799 insertions, 303 deletions
diff --git a/Documentation/devicetree/bindings/net/sff,sfp.txt b/Documentation/devicetree/bindings/net/sff,sfp.txt index f1c441bedf68..929591d52ed6 100644 --- a/Documentation/devicetree/bindings/net/sff,sfp.txt +++ b/Documentation/devicetree/bindings/net/sff,sfp.txt @@ -33,6 +33,10 @@ Optional Properties: Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high: high Tx rate. Must not be present for SFF modules +- maximum-power-milliwatt : Maximum module power consumption + Specifies the maximum power consumption allowable by a module in the + slot, in milli-Watts. Presently, modules can be up to 1W, 1.5W or 2W. + Example #1: Direct serdes to SFP connection sfp_eth3: sfp-eth3 { @@ -40,6 +44,7 @@ sfp_eth3: sfp-eth3 { i2c-bus = <&sfp_1g_i2c>; los-gpios = <&cpm_gpio2 22 GPIO_ACTIVE_HIGH>; mod-def0-gpios = <&cpm_gpio2 21 GPIO_ACTIVE_LOW>; + maximum-power-milliwatt = <1000>; pinctrl-names = "default"; pinctrl-0 = <&cpm_sfp_1g_pins &cps_sfp_1g_pins>; tx-disable-gpios = <&cps_gpio1 24 GPIO_ACTIVE_HIGH>; diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c index f17a160dbff2..137cbb470af2 100644 --- a/drivers/net/ethernet/apple/macmace.c +++ b/drivers/net/ethernet/apple/macmace.c @@ -247,8 +247,8 @@ static int mace_probe(struct platform_device *pdev) dev->netdev_ops = &mace_netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - printk(KERN_INFO "%s: 68K MACE, hardware address %pM\n", - dev->name, dev->dev_addr); + pr_info("Onboard MACE, hardware address %pM, chip revision 0x%04X\n", + dev->dev_addr, mp->chipid); err = register_netdev(dev); if (!err) @@ -589,7 +589,6 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) else if (fs & (UFLO|LCOL|RTRY)) { ++dev->stats.tx_aborted_errors; if (mb->xmtfs & UFLO) { - printk(KERN_ERR "%s: DMA underrun.\n", dev->name); dev->stats.tx_fifo_errors++; mace_txdma_reset(dev); } @@ -644,10 +643,8 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf) if (frame_status & (RS_OFLO | RS_CLSN | RS_FRAMERR | RS_FCSERR)) { dev->stats.rx_errors++; - if (frame_status & RS_OFLO) { - printk(KERN_DEBUG "%s: fifo overflow.\n", dev->name); + if (frame_status & RS_OFLO) dev->stats.rx_fifo_errors++; - } if (frame_status & RS_CLSN) dev->stats.collisions++; if (frame_status & RS_FRAMERR) @@ -770,18 +767,4 @@ static struct platform_driver mac_mace_driver = { }, }; -static int __init mac_mace_init_module(void) -{ - if (!MACH_IS_MAC) - return -ENODEV; - - return platform_driver_register(&mac_mace_driver); -} - -static void __exit mac_mace_cleanup_module(void) -{ - platform_driver_unregister(&mac_mace_driver); -} - -module_init(mac_mace_init_module); -module_exit(mac_mace_cleanup_module); +module_platform_driver(mac_mace_driver); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 1a49297224ed..ff92ab1daeb8 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -19,7 +19,7 @@ #include "be.h" #include "be_cmds.h" -char *be_misconfig_evt_port_state[] = { +const char * const be_misconfig_evt_port_state[] = { "Physical Link is functional", "Optics faulted/incorrectly installed/not installed - Reseat optics. If issue not resolved, replace.", "Optics of two types installed – Remove one optic or install matching pair of optics.", diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 09da2d82c2f0..e8b43cf44b6f 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -201,7 +201,7 @@ enum { phy_state == BE_PHY_UNQUALIFIED || \ phy_state == BE_PHY_UNCERTIFIED) -extern char *be_misconfig_evt_port_state[]; +extern const char * const be_misconfig_evt_port_state[]; /* async event indicating misconfigured port */ struct be_async_event_misconfig_port { diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 5a1668cdb461..9418f6eed086 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -65,6 +65,10 @@ #define MVPP2_RXQ_PACKET_OFFSET_MASK 0x70000000 #define MVPP2_RXQ_DISABLE_MASK BIT(31) +/* Top Registers */ +#define MVPP2_MH_REG(port) (0x5040 + 4 * (port)) +#define MVPP2_DSA_EXTENDED BIT(5) + /* Parser Registers */ #define MVPP2_PRS_INIT_LOOKUP_REG 0x1000 #define MVPP2_PRS_PORT_LU_MAX 0xf @@ -473,6 +477,7 @@ #define MVPP2_ETH_TYPE_LEN 2 #define MVPP2_PPPOE_HDR_SIZE 8 #define MVPP2_VLAN_TAG_LEN 4 +#define MVPP2_VLAN_TAG_EDSA_LEN 8 /* Lbtd 802.3 type */ #define MVPP2_IP_LBDT_TYPE 0xfffa @@ -609,35 +614,64 @@ enum mvpp2_tag_type { #define MVPP2_PRS_TCAM_LU_BYTE 20 #define MVPP2_PRS_TCAM_EN_OFFS(offs) ((offs) + 2) #define MVPP2_PRS_TCAM_INV_WORD 5 + +#define MVPP2_PRS_VID_TCAM_BYTE 2 + +/* There is a TCAM range reserved for VLAN filtering entries, range size is 33 + * 10 VLAN ID filter entries per port + * 1 default VLAN filter entry per port + * It is assumed that there are 3 ports for filter, not including loopback port + */ +#define MVPP2_PRS_VLAN_FILT_MAX 11 +#define MVPP2_PRS_VLAN_FILT_RANGE_SIZE 33 + +#define MVPP2_PRS_VLAN_FILT_MAX_ENTRY (MVPP2_PRS_VLAN_FILT_MAX - 2) +#define MVPP2_PRS_VLAN_FILT_DFLT_ENTRY (MVPP2_PRS_VLAN_FILT_MAX - 1) + /* Tcam entries ID */ #define MVPP2_PE_DROP_ALL 0 #define MVPP2_PE_FIRST_FREE_TID 1 -#define MVPP2_PE_LAST_FREE_TID (MVPP2_PRS_TCAM_SRAM_SIZE - 31) + +/* VLAN filtering range */ +#define MVPP2_PE_VID_FILT_RANGE_END (MVPP2_PRS_TCAM_SRAM_SIZE - 31) +#define MVPP2_PE_VID_FILT_RANGE_START (MVPP2_PE_VID_FILT_RANGE_END - \ + MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1) +#define MVPP2_PE_LAST_FREE_TID (MVPP2_PE_VID_FILT_RANGE_START - 1) #define MVPP2_PE_IP6_EXT_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 30) #define MVPP2_PE_MAC_MC_IP6 (MVPP2_PRS_TCAM_SRAM_SIZE - 29) #define MVPP2_PE_IP6_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 28) #define MVPP2_PE_IP4_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 27) #define MVPP2_PE_LAST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 26) -#define MVPP2_PE_FIRST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 19) -#define MVPP2_PE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 18) -#define MVPP2_PE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 17) -#define MVPP2_PE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 16) -#define MVPP2_PE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 15) -#define MVPP2_PE_ETYPE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 14) -#define MVPP2_PE_ETYPE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 13) -#define MVPP2_PE_ETYPE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 12) -#define MVPP2_PE_ETYPE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 11) -#define MVPP2_PE_MH_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 10) -#define MVPP2_PE_DSA_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 9) -#define MVPP2_PE_IP6_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 8) -#define MVPP2_PE_IP4_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 7) -#define MVPP2_PE_ETH_TYPE_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 6) +#define MVPP2_PE_FIRST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 21) +#define MVPP2_PE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 20) +#define MVPP2_PE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 19) +#define MVPP2_PE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 18) +#define MVPP2_PE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 17) +#define MVPP2_PE_ETYPE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 16) +#define MVPP2_PE_ETYPE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 15) +#define MVPP2_PE_ETYPE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 14) +#define MVPP2_PE_ETYPE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 13) +#define MVPP2_PE_MH_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 12) +#define MVPP2_PE_DSA_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 11) +#define MVPP2_PE_IP6_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 10) +#define MVPP2_PE_IP4_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 9) +#define MVPP2_PE_ETH_TYPE_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 8) +#define MVPP2_PE_VID_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 7) +#define MVPP2_PE_VID_EDSA_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 6) #define MVPP2_PE_VLAN_DBL (MVPP2_PRS_TCAM_SRAM_SIZE - 5) #define MVPP2_PE_VLAN_NONE (MVPP2_PRS_TCAM_SRAM_SIZE - 4) #define MVPP2_PE_MAC_MC_ALL (MVPP2_PRS_TCAM_SRAM_SIZE - 3) #define MVPP2_PE_MAC_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 2) #define MVPP2_PE_MAC_NON_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 1) +#define MVPP2_PRS_VID_PORT_FIRST(port) (MVPP2_PE_VID_FILT_RANGE_START + \ + ((port) * MVPP2_PRS_VLAN_FILT_MAX)) +#define MVPP2_PRS_VID_PORT_LAST(port) (MVPP2_PRS_VID_PORT_FIRST(port) \ + + MVPP2_PRS_VLAN_FILT_MAX_ENTRY) +/* Index of default vid filter for given port */ +#define MVPP2_PRS_VID_PORT_DFLT(port) (MVPP2_PRS_VID_PORT_FIRST(port) \ + + MVPP2_PRS_VLAN_FILT_DFLT_ENTRY) + /* Sram structure * The fields are represented by MVPP2_PRS_TCAM_DATA_REG(3)->(0). */ @@ -725,6 +759,7 @@ enum mvpp2_tag_type { #define MVPP2_PRS_IPV6_EXT_AH_L4_AI_BIT BIT(4) #define MVPP2_PRS_SINGLE_VLAN_AI 0 #define MVPP2_PRS_DBL_VLAN_AI_BIT BIT(7) +#define MVPP2_PRS_EDSA_VID_AI_BIT BIT(0) /* DSA/EDSA type */ #define MVPP2_PRS_TAGGED true @@ -747,6 +782,7 @@ enum mvpp2_prs_lookup { MVPP2_PRS_LU_MAC, MVPP2_PRS_LU_DSA, MVPP2_PRS_LU_VLAN, + MVPP2_PRS_LU_VID, MVPP2_PRS_LU_L2, MVPP2_PRS_LU_PPPOE, MVPP2_PRS_LU_IP4, @@ -1662,6 +1698,14 @@ static void mvpp2_prs_match_etype(struct mvpp2_prs_entry *pe, int offset, mvpp2_prs_tcam_data_byte_set(pe, offset + 1, ethertype & 0xff, 0xff); } +/* Set vid in tcam sw entry */ +static void mvpp2_prs_match_vid(struct mvpp2_prs_entry *pe, int offset, + unsigned short vid) +{ + mvpp2_prs_tcam_data_byte_set(pe, offset + 0, (vid & 0xf00) >> 8, 0xf); + mvpp2_prs_tcam_data_byte_set(pe, offset + 1, vid & 0xff, 0xff); +} + /* Set bits in sram sw entry */ static void mvpp2_prs_sram_bits_set(struct mvpp2_prs_entry *pe, int bit_num, int val) @@ -2029,24 +2073,30 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add, mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA); pe.index = tid; - /* Shift 4 bytes if DSA tag or 8 bytes in case of EDSA tag*/ - mvpp2_prs_sram_shift_set(&pe, shift, - MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); - /* Update shadow table */ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_DSA); if (tagged) { /* Set tagged bit in DSA tag */ mvpp2_prs_tcam_data_byte_set(&pe, 0, - MVPP2_PRS_TCAM_DSA_TAGGED_BIT, - MVPP2_PRS_TCAM_DSA_TAGGED_BIT); - /* Clear all ai bits for next iteration */ - mvpp2_prs_sram_ai_update(&pe, 0, - MVPP2_PRS_SRAM_AI_MASK); - /* If packet is tagged continue check vlans */ - mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN); + MVPP2_PRS_TCAM_DSA_TAGGED_BIT, + MVPP2_PRS_TCAM_DSA_TAGGED_BIT); + + /* Set ai bits for next iteration */ + if (extend) + mvpp2_prs_sram_ai_update(&pe, 1, + MVPP2_PRS_SRAM_AI_MASK); + else + mvpp2_prs_sram_ai_update(&pe, 0, + MVPP2_PRS_SRAM_AI_MASK); + + /* If packet is tagged continue check vid filtering */ + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID); } else { + /* Shift 4 bytes for DSA tag or 8 bytes for EDSA tag*/ + mvpp2_prs_sram_shift_set(&pe, shift, + MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + /* Set result info bits to 'no vlans' */ mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE, MVPP2_PRS_RI_VLAN_MASK); @@ -2231,10 +2281,9 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai, mvpp2_prs_match_etype(pe, 0, tpid); - mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_L2); - /* Shift 4 bytes - skip 1 vlan tag */ - mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN, - MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + /* VLAN tag detected, proceed with VID filtering */ + mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VID); + /* Clear all ai bits for next iteration */ mvpp2_prs_sram_ai_update(pe, 0, MVPP2_PRS_SRAM_AI_MASK); @@ -2375,8 +2424,8 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1, mvpp2_prs_match_etype(pe, 4, tpid2); mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VLAN); - /* Shift 8 bytes - skip 2 vlan tags */ - mvpp2_prs_sram_shift_set(pe, 2 * MVPP2_VLAN_TAG_LEN, + /* Shift 4 bytes - skip outer vlan tag */ + mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_DOUBLE, MVPP2_PRS_RI_VLAN_MASK); @@ -2755,6 +2804,62 @@ static void mvpp2_prs_dsa_init(struct mvpp2 *priv) mvpp2_prs_hw_write(priv, &pe); } +/* Initialize parser entries for VID filtering */ +static void mvpp2_prs_vid_init(struct mvpp2 *priv) +{ + struct mvpp2_prs_entry pe; + + memset(&pe, 0, sizeof(pe)); + + /* Set default vid entry */ + pe.index = MVPP2_PE_VID_FLTR_DEFAULT; + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID); + + mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_EDSA_VID_AI_BIT); + + /* Skip VLAN header - Set offset to 4 bytes */ + mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_LEN, + MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + + /* Clear all ai bits for next iteration */ + mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK); + + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2); + + /* Unmask all ports */ + mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK); + + /* Update shadow table and hw entry */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID); + mvpp2_prs_hw_write(priv, &pe); + + /* Set default vid entry for extended DSA*/ + memset(&pe, 0, sizeof(pe)); + + /* Set default vid entry */ + pe.index = MVPP2_PE_VID_EDSA_FLTR_DEFAULT; + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID); + + mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_EDSA_VID_AI_BIT, + MVPP2_PRS_EDSA_VID_AI_BIT); + + /* Skip VLAN header - Set offset to 8 bytes */ + mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_EDSA_LEN, + MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + + /* Clear all ai bits for next iteration */ + mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK); + + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2); + + /* Unmask all ports */ + mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK); + + /* Update shadow table and hw entry */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID); + mvpp2_prs_hw_write(priv, &pe); +} + /* Match basic ethertypes */ static int mvpp2_prs_etype_init(struct mvpp2 *priv) { @@ -3023,7 +3128,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv) mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN); pe.index = MVPP2_PE_VLAN_DBL; - mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2); + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID); + /* Clear ai for next iterations */ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK); mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE, @@ -3386,6 +3492,192 @@ static int mvpp2_prs_ip6_init(struct mvpp2 *priv) return 0; } +/* Find tcam entry with matched pair <vid,port> */ +static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid, + u16 mask) +{ + unsigned char byte[2], enable[2]; + struct mvpp2_prs_entry pe; + u16 rvid, rmask; + int tid; + + /* Go through the all entries with MVPP2_PRS_LU_VID */ + for (tid = MVPP2_PE_VID_FILT_RANGE_START; + tid <= MVPP2_PE_VID_FILT_RANGE_END; tid++) { + if (!priv->prs_shadow[tid].valid || + priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID) + continue; + + pe.index = tid; + + mvpp2_prs_hw_read(priv, &pe); + mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]); + mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]); + + rvid = ((byte[0] & 0xf) << 8) + byte[1]; + rmask = ((enable[0] & 0xf) << 8) + enable[1]; + + if (rvid != vid || rmask != mask) + continue; + + return tid; + } + + return 0; +} + +/* Write parser entry for VID filtering */ +static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid) +{ + unsigned int vid_start = MVPP2_PE_VID_FILT_RANGE_START + + port->id * MVPP2_PRS_VLAN_FILT_MAX; + unsigned int mask = 0xfff, reg_val, shift; + struct mvpp2 *priv = port->priv; + struct mvpp2_prs_entry pe; + int tid; + + /* Scan TCAM and see if entry with this <vid,port> already exist */ + tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask); + + reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id)); + if (reg_val & MVPP2_DSA_EXTENDED) + shift = MVPP2_VLAN_TAG_EDSA_LEN; + else + shift = MVPP2_VLAN_TAG_LEN; + + /* No such entry */ + if (!tid) { + memset(&pe, 0, sizeof(pe)); + + /* Go through all entries from first to last in vlan range */ + tid = mvpp2_prs_tcam_first_free(priv, vid_start, + vid_start + + MVPP2_PRS_VLAN_FILT_MAX_ENTRY); + + /* There isn't room for a new VID filter */ + if (tid < 0) + return tid; + + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID); + pe.index = tid; + + /* Mask all ports */ + mvpp2_prs_tcam_port_map_set(&pe, 0); + } else { + mvpp2_prs_hw_read(priv, &pe); + } + + /* Enable the current port */ + mvpp2_prs_tcam_port_set(&pe, port->id, true); + + /* Continue - set next lookup */ + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2); + + /* Skip VLAN header - Set offset to 4 or 8 bytes */ + mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + + /* Set match on VID */ + mvpp2_prs_match_vid(&pe, MVPP2_PRS_VID_TCAM_BYTE, vid); + + /* Clear all ai bits for next iteration */ + mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK); + + /* Update shadow table */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID); + mvpp2_prs_hw_write(priv, &pe); + + return 0; +} + +/* Write parser entry for VID filtering */ +static void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid) +{ + struct mvpp2 *priv = port->priv; + int tid; + + /* Scan TCAM and see if entry with this <vid,port> already exist */ + tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff); + + /* No such entry */ + if (tid) + return; + + mvpp2_prs_hw_inv(priv, tid); + priv->prs_shadow[tid].valid = false; +} + +/* Remove all existing VID filters on this port */ +static void mvpp2_prs_vid_remove_all(struct mvpp2_port *port) +{ + struct mvpp2 *priv = port->priv; + int tid; + + for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id); + tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) { + if (priv->prs_shadow[tid].valid) + mvpp2_prs_vid_entry_remove(port, tid); + } +} + +/* Remove VID filering entry for this port */ +static void mvpp2_prs_vid_disable_filtering(struct mvpp2_port *port) +{ + unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id); + struct mvpp2 *priv = port->priv; + + /* Invalidate the guard entry */ + mvpp2_prs_hw_inv(priv, tid); + + priv->prs_shadow[tid].valid = false; +} + +/* Add guard entry that drops packets when no VID is matched on this port */ +static void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port) +{ + unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id); + struct mvpp2 *priv = port->priv; + unsigned int reg_val, shift; + struct mvpp2_prs_entry pe; + + if (priv->prs_shadow[tid].valid) + return; + + memset(&pe, 0, sizeof(pe)); + + pe.index = tid; + + reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id)); + if (reg_val & MVPP2_DSA_EXTENDED) + shift = MVPP2_VLAN_TAG_EDSA_LEN; + else + shift = MVPP2_VLAN_TAG_LEN; + + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID); + + /* Mask all ports */ + mvpp2_prs_tcam_port_map_set(&pe, 0); + + /* Update port mask */ + mvpp2_prs_tcam_port_set(&pe, port->id, true); + + /* Continue - set next lookup */ + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2); + + /* Skip VLAN header - Set offset to 4 or 8 bytes */ + mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + + /* Drop VLAN packets that don't belong to any VIDs on this port */ + mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK, + MVPP2_PRS_RI_DROP_MASK); + + /* Clear all ai bits for next iteration */ + mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK); + + /* Update shadow table */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID); + mvpp2_prs_hw_write(priv, &pe); +} + /* Parser default initialization */ static int mvpp2_prs_default_init(struct platform_device *pdev, struct mvpp2 *priv) @@ -3429,6 +3721,8 @@ static int mvpp2_prs_default_init(struct platform_device *pdev, mvpp2_prs_dsa_init(priv); + mvpp2_prs_vid_init(priv); + err = mvpp2_prs_etype_init(priv); if (err) return err; @@ -7153,6 +7447,12 @@ retry: } } } + + /* Disable VLAN filtering in promiscuous mode */ + if (dev->flags & IFF_PROMISC) + mvpp2_prs_vid_disable_filtering(port); + else + mvpp2_prs_vid_enable_filtering(port); } static int mvpp2_set_mac_address(struct net_device *dev, void *p) @@ -7292,6 +7592,48 @@ static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return ret; } +static int mvpp2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mvpp2_port *port = netdev_priv(dev); + int ret; + + ret = mvpp2_prs_vid_entry_add(port, vid); + if (ret) + netdev_err(dev, "rx-vlan-filter offloading cannot accept more than %d VIDs per port\n", + MVPP2_PRS_VLAN_FILT_MAX - 1); + return ret; +} + +static int mvpp2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mvpp2_port *port = netdev_priv(dev); + + mvpp2_prs_vid_entry_remove(port, vid); + return 0; +} + +static int mvpp2_set_features(struct net_device *dev, + netdev_features_t features) +{ + netdev_features_t changed = dev->features ^ features; + struct mvpp2_port *port = netdev_priv(dev); + + if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) { + if (features & NETIF_F_HW_VLAN_CTAG_FILTER) { + mvpp2_prs_vid_enable_filtering(port); + } else { + /* Invalidate all registered VID filters for this + * port + */ + mvpp2_prs_vid_remove_all(port); + + mvpp2_prs_vid_disable_filtering(port); + } + } + + return 0; +} + /* Ethtool methods */ /* Set interrupt coalescing for ethtools */ @@ -7433,6 +7775,9 @@ static const struct net_device_ops mvpp2_netdev_ops = { .ndo_change_mtu = mvpp2_change_mtu, .ndo_get_stats64 = mvpp2_get_stats64, .ndo_do_ioctl = mvpp2_ioctl, + .ndo_vlan_rx_add_vid = mvpp2_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = mvpp2_vlan_rx_kill_vid, + .ndo_set_features = mvpp2_set_features, }; static const struct ethtool_ops mvpp2_eth_tool_ops = { @@ -7945,7 +8290,8 @@ static int mvpp2_port_probe(struct platform_device *pdev, features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; dev->features = features | NETIF_F_RXCSUM; - dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO; + dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO | + NETIF_F_HW_VLAN_CTAG_FILTER; dev->vlan_features |= features; dev->gso_max_segs = MVPP2_MAX_TSO_SEGS; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 8d2d140d7910..7c6204f701ae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1040,6 +1040,16 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev, xstats->tail_drop[i] = mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl); } + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT, + i, ppcnt_pl); + if (err) + continue; + + xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl); + xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl); + } } static void update_stats_cache(struct work_struct *work) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 2673310f92da..d5e711d8ad71 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -210,6 +210,8 @@ struct mlxsw_sp_port_xstats { u64 wred_drop[TC_MAX_QUEUE]; u64 tail_drop[TC_MAX_QUEUE]; u64 backlog[TC_MAX_QUEUE]; + u64 tx_bytes[IEEE_8021QAZ_MAX_TCS]; + u64 tx_packets[IEEE_8021QAZ_MAX_TCS]; }; struct mlxsw_sp_port { @@ -247,6 +249,7 @@ struct mlxsw_sp_port { struct mlxsw_sp_port_sample *sample; struct list_head vlans_list; struct mlxsw_sp_qdisc *root_qdisc; + struct mlxsw_sp_qdisc *tclass_qdiscs; unsigned acl_rule_count; struct mlxsw_sp_acl_block *ing_acl_block; struct mlxsw_sp_acl_block *eg_acl_block; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 0b7670459051..91262b0573e3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -42,6 +42,8 @@ #include "reg.h" #define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1) +#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \ + MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1)) enum mlxsw_sp_qdisc_type { MLXSW_SP_QDISC_NO_QDISC, @@ -76,6 +78,7 @@ struct mlxsw_sp_qdisc_ops { struct mlxsw_sp_qdisc { u32 handle; u8 tclass_num; + u8 prio_bitmap; union { struct red_stats red; } xstats_base; @@ -99,6 +102,44 @@ mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle, mlxsw_sp_qdisc->handle == handle; } +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent, + bool root_only) +{ + int tclass, child_index; + + if (parent == TC_H_ROOT) + return mlxsw_sp_port->root_qdisc; + + if (root_only || !mlxsw_sp_port->root_qdisc || + !mlxsw_sp_port->root_qdisc->ops || + TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle || + TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS) + return NULL; + + child_index = TC_H_MIN(parent); + tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index); + return &mlxsw_sp_port->tclass_qdiscs[tclass]; +} + +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle) +{ + int i; + + if (mlxsw_sp_port->root_qdisc->handle == handle) + return mlxsw_sp_port->root_qdisc; + + if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC) + return NULL; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle) + return &mlxsw_sp_port->tclass_qdiscs[i]; + + return NULL; +} + static int mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) @@ -185,6 +226,23 @@ mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port, return -EOPNOTSUPP; } +static void +mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats, + u8 prio_bitmap, u64 *tx_packets, + u64 *tx_bytes) +{ + int i; + + *tx_packets = 0; + *tx_bytes = 0; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (prio_bitmap & BIT(i)) { + *tx_packets += xstats->tx_packets[i]; + *tx_bytes += xstats->tx_bytes[i]; + } + } +} + static int mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, int tclass_num, u32 min, u32 max, @@ -230,17 +288,16 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, u8 tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; - struct rtnl_link_stats64 *stats; struct red_stats *red_base; xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; - stats = &mlxsw_sp_port->periodic_hw_stats.stats; stats_base = &mlxsw_sp_qdisc->stats_base; red_base = &mlxsw_sp_qdisc->xstats_base.red; - stats_base->tx_packets = stats->tx_packets; - stats_base->tx_bytes = stats->tx_bytes; - + mlxsw_sp_qdisc_bstats_per_priority_get(xstats, + mlxsw_sp_qdisc->prio_bitmap, + &stats_base->tx_packets, + &stats_base->tx_bytes); red_base->prob_mark = xstats->ecn; red_base->prob_drop = xstats->wred_drop[tclass_num]; red_base->pdrop = xstats->tail_drop[tclass_num]; @@ -255,6 +312,12 @@ static int mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { + struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc; + + if (root_qdisc != mlxsw_sp_qdisc) + root_qdisc->stats_base.backlog -= + mlxsw_sp_qdisc->stats_base.backlog; + return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, mlxsw_sp_qdisc->tclass_num); } @@ -319,6 +382,7 @@ mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc->stats_base.backlog); p->qstats->backlog -= backlog; + mlxsw_sp_qdisc->stats_base.backlog = 0; } static int @@ -357,14 +421,16 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, u8 tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; - struct rtnl_link_stats64 *stats; xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; - stats = &mlxsw_sp_port->periodic_hw_stats.stats; stats_base = &mlxsw_sp_qdisc->stats_base; - tx_bytes = stats->tx_bytes - stats_base->tx_bytes; - tx_packets = stats->tx_packets - stats_base->tx_packets; + mlxsw_sp_qdisc_bstats_per_priority_get(xstats, + mlxsw_sp_qdisc->prio_bitmap, + &tx_packets, &tx_bytes); + tx_bytes = tx_bytes - stats_base->tx_bytes; + tx_packets = tx_packets - stats_base->tx_packets; + overlimits = xstats->wred_drop[tclass_num] + xstats->ecn - stats_base->overlimits; drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] - @@ -406,11 +472,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; - if (p->parent != TC_H_ROOT) + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false); + if (!mlxsw_sp_qdisc) return -EOPNOTSUPP; - mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc; - if (p->command == TC_RED_REPLACE) return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, mlxsw_sp_qdisc, @@ -441,9 +506,13 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, { int i; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, MLXSW_SP_PORT_DEFAULT_TCLASS); + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, + &mlxsw_sp_port->tclass_qdiscs[i]); + mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0; + } return 0; } @@ -467,16 +536,41 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, void *params) { struct tc_prio_qopt_offload_params *p = params; - int tclass, i; + struct mlxsw_sp_qdisc *child_qdisc; + int tclass, i, band, backlog; + u8 old_priomap; int err; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]); - err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass); - if (err) - return err; + for (band = 0; band < p->bands; band++) { + tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); + child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; + old_priomap = child_qdisc->prio_bitmap; + child_qdisc->prio_bitmap = 0; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (p->priomap[i] == band) { + child_qdisc->prio_bitmap |= BIT(i); + if (BIT(i) & old_priomap) + continue; + err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, + i, tclass); + if (err) + return err; + } + } + if (old_priomap != child_qdisc->prio_bitmap && + child_qdisc->ops && child_qdisc->ops->clean_stats) { + backlog = child_qdisc->stats_base.backlog; + child_qdisc->ops->clean_stats(mlxsw_sp_port, + child_qdisc); + child_qdisc->stats_base.backlog = backlog; + } + } + for (; band < IEEE_8021QAZ_MAX_TCS; band++) { + tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); + child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; + child_qdisc->prio_bitmap = 0; + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc); } - return 0; } @@ -513,6 +607,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { drops += xstats->tail_drop[i]; + drops += xstats->wred_drop[i]; backlog += xstats->backlog[i]; } drops = drops - stats_base->drops; @@ -548,8 +643,10 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, stats_base->tx_bytes = stats->tx_bytes; stats_base->drops = 0; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { stats_base->drops += xstats->tail_drop[i]; + stats_base->drops += xstats->wred_drop[i]; + } mlxsw_sp_qdisc->stats_base.backlog = 0; } @@ -564,15 +661,48 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, }; +/* Grafting is not supported in mlxsw. It will result in un-offloading of the + * grafted qdisc as well as the qdisc in the qdisc new location. + * (However, if the graft is to the location where the qdisc is already at, it + * will be ignored completely and won't cause un-offloading). + */ +static int +mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_prio_qopt_offload_graft_params *p) +{ + int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band); + struct mlxsw_sp_qdisc *old_qdisc; + + /* Check if the grafted qdisc is already in its "new" location. If so - + * nothing needs to be done. + */ + if (p->band < IEEE_8021QAZ_MAX_TCS && + mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle) + return 0; + + /* See if the grafted qdisc is already offloaded on any tclass. If so, + * unoffload it. + */ + old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port, + p->child_handle); + if (old_qdisc) + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc); + + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, + &mlxsw_sp_port->tclass_qdiscs[tclass_num]); + return -EOPNOTSUPP; +} + int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; - if (p->parent != TC_H_ROOT) + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true); + if (!mlxsw_sp_qdisc) return -EOPNOTSUPP; - mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc; if (p->command == TC_PRIO_REPLACE) return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, mlxsw_sp_qdisc, @@ -589,6 +719,9 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, case TC_PRIO_STATS: return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, &p->stats); + case TC_PRIO_GRAFT: + return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->graft_params); default: return -EOPNOTSUPP; } @@ -596,17 +729,36 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) { - mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc), - GFP_KERNEL); - if (!mlxsw_sp_port->root_qdisc) - return -ENOMEM; + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + int i; + mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL); + if (!mlxsw_sp_qdisc) + goto err_root_qdisc_init; + + mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc; + mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff; mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; + mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS, + GFP_KERNEL); + if (!mlxsw_sp_qdisc) + goto err_tclass_qdiscs_init; + + mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i; + return 0; + +err_tclass_qdiscs_init: + kfree(mlxsw_sp_port->root_qdisc); +err_root_qdisc_init: + return -ENOMEM; } void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port) { + kfree(mlxsw_sp_port->tclass_qdiscs); kfree(mlxsw_sp_port->root_qdisc); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index ca4a81dc1ace..03ad4eeac7f8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1784,7 +1784,7 @@ enum qed_iwarp_mpa_pkt_type { /* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */ #define QED_IWARP_MAX_BDS_PER_FPDU 3 -char *pkt_type_str[] = { +static const char * const pkt_type_str[] = { "QED_IWARP_MPA_PKT_PACKED", "QED_IWARP_MPA_PKT_PARTIAL", "QED_IWARP_MPA_PKT_UNALIGNED" diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index cc51286ee51f..0a0638d692f9 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -735,10 +735,6 @@ struct ring_info { u8 __pad[sizeof(void *) - sizeof(u32)]; }; -enum features { - RTL_FEATURE_GMII = (1 << 0), -}; - struct rtl8169_counters { __le64 tx_packets; __le64 rx_packets; @@ -8233,7 +8229,7 @@ static const struct rtl_cfg_info { unsigned int region; unsigned int align; u16 event_slow; - unsigned features; + unsigned int has_gmii:1; const struct rtl_coalesce_info *coalesce_info; u8 default_ver; } rtl_cfg_infos [] = { @@ -8242,7 +8238,7 @@ static const struct rtl_cfg_info { .region = 1, .align = 0, .event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver, - .features = RTL_FEATURE_GMII, + .has_gmii = 1, .coalesce_info = rtl_coalesce_info_8169, .default_ver = RTL_GIGA_MAC_VER_01, }, @@ -8251,7 +8247,7 @@ static const struct rtl_cfg_info { .region = 2, .align = 8, .event_slow = SYSErr | LinkChg | RxOverflow, - .features = RTL_FEATURE_GMII, + .has_gmii = 1, .coalesce_info = rtl_coalesce_info_8168_8136, .default_ver = RTL_GIGA_MAC_VER_11, }, @@ -8394,7 +8390,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) mii->mdio_write = rtl_mdio_write; mii->phy_id_mask = 0x1f; mii->reg_num_mask = 0x1f; - mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII); + mii->supports_gmii = cfg->has_gmii; /* disable ASPM completely as that cause random device stop working * problems as well as full system hangs for some PCIe devices users */ diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 5166575a164d..a115f12bf130 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -74,6 +74,7 @@ struct ipvl_dev { DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE); netdev_features_t sfeatures; u32 msg_enable; + spinlock_t addrs_lock; }; struct ipvl_addr { diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 1b5dc200b573..17daebd19e65 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -109,25 +109,33 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr) struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6) { - struct ipvl_addr *addr; + struct ipvl_addr *addr, *ret = NULL; - list_for_each_entry(addr, &ipvlan->addrs, anode) - if (addr_equal(is_v6, addr, iaddr)) - return addr; - return NULL; + rcu_read_lock(); + list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) { + if (addr_equal(is_v6, addr, iaddr)) { + ret = addr; + break; + } + } + rcu_read_unlock(); + return ret; } bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) { struct ipvl_dev *ipvlan; + bool ret = false; - ASSERT_RTNL(); - - list_for_each_entry(ipvlan, &port->ipvlans, pnode) { - if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) - return true; + rcu_read_lock(); + list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { + if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) { + ret = true; + break; + } } - return false; + rcu_read_unlock(); + return ret; } static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type) @@ -498,8 +506,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb) /* In this mode we dont care about multicast and broadcast traffic */ if (is_multicast_ether_addr(ethh->h_dest)) { - pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n", - ntohs(skb->protocol)); + pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n", + ntohs(skb->protocol)); kfree_skb(skb); goto out; } diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index d05b902c925b..3efc1c92c6a7 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -227,8 +227,10 @@ static int ipvlan_open(struct net_device *dev) else dev->flags &= ~IFF_NOARP; - list_for_each_entry(addr, &ipvlan->addrs, anode) + rcu_read_lock(); + list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_add(ipvlan, addr); + rcu_read_unlock(); return dev_uc_add(phy_dev, phy_dev->dev_addr); } @@ -244,8 +246,10 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_del(phy_dev, phy_dev->dev_addr); - list_for_each_entry(addr, &ipvlan->addrs, anode) + rcu_read_lock(); + list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_del(addr); + rcu_read_unlock(); return 0; } @@ -588,6 +592,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev, ipvlan->sfeatures = IPVLAN_FEATURES; ipvlan_adjust_mtu(ipvlan, phy_dev); INIT_LIST_HEAD(&ipvlan->addrs); + spin_lock_init(&ipvlan->addrs_lock); /* TODO Probably put random address here to be presented to the * world but keep using the physical-dev address for the outgoing @@ -665,11 +670,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head) struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_addr *addr, *next; + spin_lock_bh(&ipvlan->addrs_lock); list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { ipvlan_ht_addr_del(addr); - list_del(&addr->anode); + list_del_rcu(&addr->anode); kfree_rcu(addr, rcu); } + spin_unlock_bh(&ipvlan->addrs_lock); ida_simple_remove(&ipvlan->port->ida, dev->dev_id); list_del_rcu(&ipvlan->pnode); @@ -760,8 +767,7 @@ static int ipvlan_device_event(struct notifier_block *unused, if (dev->reg_state != NETREG_UNREGISTERING) break; - list_for_each_entry_safe(ipvlan, next, &port->ipvlans, - pnode) + list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode) ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev, &lst_kill); unregister_netdevice_many(&lst_kill); @@ -793,6 +799,7 @@ static int ipvlan_device_event(struct notifier_block *unused, return NOTIFY_DONE; } +/* the caller must held the addrs lock */ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) { struct ipvl_addr *addr; @@ -811,7 +818,8 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) addr->atype = IPVL_IPV6; #endif } - list_add_tail(&addr->anode, &ipvlan->addrs); + + list_add_tail_rcu(&addr->anode, &ipvlan->addrs); /* If the interface is not up, the address will be added to the hash * list by ipvlan_open. @@ -826,15 +834,17 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) { struct ipvl_addr *addr; + spin_lock_bh(&ipvlan->addrs_lock); addr = ipvlan_find_addr(ipvlan, iaddr, is_v6); - if (!addr) + if (!addr) { + spin_unlock_bh(&ipvlan->addrs_lock); return; + } ipvlan_ht_addr_del(addr); - list_del(&addr->anode); + list_del_rcu(&addr->anode); + spin_unlock_bh(&ipvlan->addrs_lock); kfree_rcu(addr, rcu); - - return; } static bool ipvlan_is_valid_dev(const struct net_device *dev) @@ -853,14 +863,17 @@ static bool ipvlan_is_valid_dev(const struct net_device *dev) #if IS_ENABLED(CONFIG_IPV6) static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) { - if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) { + int ret = -EINVAL; + + spin_lock_bh(&ipvlan->addrs_lock); + if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) netif_err(ipvlan, ifup, ipvlan->dev, "Failed to add IPv6=%pI6c addr for %s intf\n", ip6_addr, ipvlan->dev->name); - return -EINVAL; - } - - return ipvlan_add_addr(ipvlan, ip6_addr, true); + else + ret = ipvlan_add_addr(ipvlan, ip6_addr, true); + spin_unlock_bh(&ipvlan->addrs_lock); + return ret; } static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) @@ -899,10 +912,6 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused, struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); - /* FIXME IPv6 autoconf calls us from bh without RTNL */ - if (in_softirq()) - return NOTIFY_DONE; - if (!ipvlan_is_valid_dev(dev)) return NOTIFY_DONE; @@ -922,14 +931,17 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused, static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) { - if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) { + int ret = -EINVAL; + + spin_lock_bh(&ipvlan->addrs_lock); + if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) netif_err(ipvlan, ifup, ipvlan->dev, "Failed to add IPv4=%pI4 on %s intf.\n", ip4_addr, ipvlan->dev->name); - return -EINVAL; - } - - return ipvlan_add_addr(ipvlan, ip4_addr, false); + else + ret = ipvlan_add_addr(ipvlan, ip4_addr, false); + spin_unlock_bh(&ipvlan->addrs_lock); + return ret; } static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 6ac8b29b2dc3..27327c917a59 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1584,25 +1584,14 @@ static int phylink_sfp_module_insert(void *upstream, bool changed; u8 port; - sfp_parse_support(pl->sfp_bus, id, support); - port = sfp_parse_port(pl->sfp_bus, id, support); - iface = sfp_parse_interface(pl->sfp_bus, id); - ASSERT_RTNL(); - switch (iface) { - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_1000BASEX: - case PHY_INTERFACE_MODE_2500BASEX: - case PHY_INTERFACE_MODE_10GKR: - break; - default: - return -EINVAL; - } + sfp_parse_support(pl->sfp_bus, id, support); + port = sfp_parse_port(pl->sfp_bus, id, support); memset(&config, 0, sizeof(config)); linkmode_copy(config.advertising, support); - config.interface = iface; + config.interface = PHY_INTERFACE_MODE_NA; config.speed = SPEED_UNKNOWN; config.duplex = DUPLEX_UNKNOWN; config.pause = MLO_PAUSE_AN; @@ -1611,6 +1600,22 @@ static int phylink_sfp_module_insert(void *upstream, /* Ignore errors if we're expecting a PHY to attach later */ ret = phylink_validate(pl, support, &config); if (ret) { + netdev_err(pl->netdev, "validation with support %*pb failed: %d\n", + __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret); + return ret; + } + + iface = sfp_select_interface(pl->sfp_bus, id, config.advertising); + if (iface == PHY_INTERFACE_MODE_NA) { + netdev_err(pl->netdev, + "selection of interface failed, advertisment %*pb\n", + __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising); + return -EINVAL; + } + + config.interface = iface; + ret = phylink_validate(pl, support, &config); + if (ret) { netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n", phylink_an_mode_str(MLO_AN_INBAND), phy_modes(config.interface), diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 8961209ee949..3d4ff5d0d2a6 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -106,68 +106,6 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, EXPORT_SYMBOL_GPL(sfp_parse_port); /** - * sfp_parse_interface() - Parse the phy_interface_t - * @bus: a pointer to the &struct sfp_bus structure for the sfp module - * @id: a pointer to the module's &struct sfp_eeprom_id - * - * Derive the phy_interface_t mode for the information found in the - * module's identifying EEPROM. There is no standard or defined way - * to derive this information, so we use some heuristics. - * - * If the encoding is 64b66b, then the module must be >= 10G, so - * return %PHY_INTERFACE_MODE_10GKR. - * - * If it's 8b10b, then it's 1G or slower. If it's definitely a fibre - * module, return %PHY_INTERFACE_MODE_1000BASEX mode, otherwise return - * %PHY_INTERFACE_MODE_SGMII mode. - * - * If the encoding is not known, return %PHY_INTERFACE_MODE_NA. - */ -phy_interface_t sfp_parse_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id) -{ - phy_interface_t iface; - - /* Setting the serdes link mode is guesswork: there's no field in - * the EEPROM which indicates what mode should be used. - * - * If the module wants 64b66b, then it must be >= 10G. - * - * If it's a gigabit-only fiber module, it probably does not have - * a PHY, so switch to 802.3z negotiation mode. Otherwise, switch - * to SGMII mode (which is required to support non-gigabit speeds). - */ - switch (id->base.encoding) { - case SFP_ENCODING_8472_64B66B: - iface = PHY_INTERFACE_MODE_10GKR; - break; - - case SFP_ENCODING_8B10B: - if (!id->base.e1000_base_t && - !id->base.e100_base_lx && - !id->base.e100_base_fx) - iface = PHY_INTERFACE_MODE_1000BASEX; - else - iface = PHY_INTERFACE_MODE_SGMII; - break; - - default: - if (id->base.e1000_base_cx) { - iface = PHY_INTERFACE_MODE_1000BASEX; - break; - } - - iface = PHY_INTERFACE_MODE_NA; - dev_err(bus->sfp_dev, - "SFP module encoding does not support 8b10b nor 64b66b\n"); - break; - } - - return iface; -} -EXPORT_SYMBOL_GPL(sfp_parse_interface); - -/** * sfp_parse_support() - Parse the eeprom id for supported link modes * @bus: a pointer to the &struct sfp_bus structure for the sfp module * @id: a pointer to the module's &struct sfp_eeprom_id @@ -180,10 +118,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support) { unsigned int br_min, br_nom, br_max; - - phylink_set(support, Autoneg); - phylink_set(support, Pause); - phylink_set(support, Asym_Pause); + __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, }; /* Decode the bitrate information to MBd */ br_min = br_nom = br_max = 0; @@ -201,20 +136,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, /* Set ethtool support from the compliance fields. */ if (id->base.e10g_base_sr) - phylink_set(support, 10000baseSR_Full); + phylink_set(modes, 10000baseSR_Full); if (id->base.e10g_base_lr) - phylink_set(support, 10000baseLR_Full); + phylink_set(modes, 10000baseLR_Full); if (id->base.e10g_base_lrm) - phylink_set(support, 10000baseLRM_Full); + phylink_set(modes, 10000baseLRM_Full); if (id->base.e10g_base_er) - phylink_set(support, 10000baseER_Full); + phylink_set(modes, 10000baseER_Full); if (id->base.e1000_base_sx || id->base.e1000_base_lx || id->base.e1000_base_cx) - phylink_set(support, 1000baseX_Full); + phylink_set(modes, 1000baseX_Full); if (id->base.e1000_base_t) { - phylink_set(support, 1000baseT_Half); - phylink_set(support, 1000baseT_Full); + phylink_set(modes, 1000baseT_Half); + phylink_set(modes, 1000baseT_Full); } /* 1000Base-PX or 1000Base-BX10 */ @@ -228,20 +163,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, if ((id->base.sfp_ct_passive || id->base.sfp_ct_active) && br_nom) { /* This may look odd, but some manufacturers use 12000MBd */ if (br_min <= 12000 && br_max >= 10300) - phylink_set(support, 10000baseCR_Full); + phylink_set(modes, 10000baseCR_Full); if (br_min <= 3200 && br_max >= 3100) - phylink_set(support, 2500baseX_Full); + phylink_set(modes, 2500baseX_Full); if (br_min <= 1300 && br_max >= 1200) - phylink_set(support, 1000baseX_Full); + phylink_set(modes, 1000baseX_Full); } if (id->base.sfp_ct_passive) { if (id->base.passive.sff8431_app_e) - phylink_set(support, 10000baseCR_Full); + phylink_set(modes, 10000baseCR_Full); } if (id->base.sfp_ct_active) { if (id->base.active.sff8431_app_e || id->base.active.sff8431_lim) { - phylink_set(support, 10000baseCR_Full); + phylink_set(modes, 10000baseCR_Full); } } @@ -249,18 +184,18 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, case 0x00: /* Unspecified */ break; case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */ - phylink_set(support, 100000baseSR4_Full); - phylink_set(support, 25000baseSR_Full); + phylink_set(modes, 100000baseSR4_Full); + phylink_set(modes, 25000baseSR_Full); break; case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */ case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */ - phylink_set(support, 100000baseLR4_ER4_Full); + phylink_set(modes, 100000baseLR4_ER4_Full); break; case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */ case 0x0c: /* 25Gbase-CR CA-S */ case 0x0d: /* 25Gbase-CR CA-N */ - phylink_set(support, 100000baseCR4_Full); - phylink_set(support, 25000baseCR_Full); + phylink_set(modes, 100000baseCR4_Full); + phylink_set(modes, 25000baseCR_Full); break; default: dev_warn(bus->sfp_dev, @@ -274,13 +209,70 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, id->base.fc_speed_200 || id->base.fc_speed_400) { if (id->base.br_nominal >= 31) - phylink_set(support, 2500baseX_Full); + phylink_set(modes, 2500baseX_Full); if (id->base.br_nominal >= 12) - phylink_set(support, 1000baseX_Full); + phylink_set(modes, 1000baseX_Full); } + + /* If we haven't discovered any modes that this module supports, try + * the encoding and bitrate to determine supported modes. Some BiDi + * modules (eg, 1310nm/1550nm) are not 1000BASE-BX compliant due to + * the differing wavelengths, so do not set any transceiver bits. + */ + if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) { + /* If the encoding and bit rate allows 1000baseX */ + if (id->base.encoding == SFP_ENCODING_8B10B && br_nom && + br_min <= 1300 && br_max >= 1200) + phylink_set(modes, 1000baseX_Full); + } + + bitmap_or(support, support, modes, __ETHTOOL_LINK_MODE_MASK_NBITS); + + phylink_set(support, Autoneg); + phylink_set(support, Pause); + phylink_set(support, Asym_Pause); } EXPORT_SYMBOL_GPL(sfp_parse_support); +/** + * sfp_select_interface() - Select appropriate phy_interface_t mode + * @bus: a pointer to the &struct sfp_bus structure for the sfp module + * @id: a pointer to the module's &struct sfp_eeprom_id + * @link_modes: ethtool link modes mask + * + * Derive the phy_interface_t mode for the information found in the + * module's identifying EEPROM and the link modes mask. There is no + * standard or defined way to derive this information, so we decide + * based upon the link mode mask. + */ +phy_interface_t sfp_select_interface(struct sfp_bus *bus, + const struct sfp_eeprom_id *id, + unsigned long *link_modes) +{ + if (phylink_test(link_modes, 10000baseCR_Full) || + phylink_test(link_modes, 10000baseSR_Full) || + phylink_test(link_modes, 10000baseLR_Full) || + phylink_test(link_modes, 10000baseLRM_Full) || + phylink_test(link_modes, 10000baseER_Full)) + return PHY_INTERFACE_MODE_10GKR; + + if (phylink_test(link_modes, 2500baseX_Full)) + return PHY_INTERFACE_MODE_2500BASEX; + + if (id->base.e1000_base_t || + id->base.e100_base_lx || + id->base.e100_base_fx) + return PHY_INTERFACE_MODE_SGMII; + + if (phylink_test(link_modes, 1000baseX_Full)) + return PHY_INTERFACE_MODE_1000BASEX; + + dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n"); + + return PHY_INTERFACE_MODE_NA; +} +EXPORT_SYMBOL_GPL(sfp_select_interface); + static LIST_HEAD(sfp_buses); static DEFINE_MUTEX(sfp_mutex); diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 6c7d9289078d..83bf4959b043 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -42,6 +42,7 @@ enum { SFP_MOD_EMPTY = 0, SFP_MOD_PROBE, + SFP_MOD_HPOWER, SFP_MOD_PRESENT, SFP_MOD_ERROR, @@ -86,6 +87,7 @@ static const enum gpiod_flags gpio_flags[] = { * access the I2C EEPROM. However, Avago modules require 300ms. */ #define T_PROBE_INIT msecs_to_jiffies(300) +#define T_HPOWER_LEVEL msecs_to_jiffies(300) #define T_PROBE_RETRY msecs_to_jiffies(100) /* SFP modules appear to always have their PHY configured for bus address @@ -110,10 +112,12 @@ struct sfp { struct sfp_bus *sfp_bus; struct phy_device *mod_phy; const struct sff_data *type; + u32 max_power_mW; unsigned int (*get_state)(struct sfp *); void (*set_state)(struct sfp *, unsigned int); int (*read)(struct sfp *, bool, u8, void *, size_t); + int (*write)(struct sfp *, bool, u8, void *, size_t); struct gpio_desc *gpio[GPIO_MAX]; @@ -201,10 +205,11 @@ static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state) } } -static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr, - void *buf, size_t len) +static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf, + size_t len) { struct i2c_msg msgs[2]; + u8 bus_addr = a2 ? 0x51 : 0x50; int ret; msgs[0].addr = bus_addr; @@ -216,17 +221,38 @@ static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr, msgs[1].len = len; msgs[1].buf = buf; - ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs)); + ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs)); if (ret < 0) return ret; return ret == ARRAY_SIZE(msgs) ? len : 0; } -static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf, - size_t len) +static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf, + size_t len) { - return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len); + struct i2c_msg msgs[1]; + u8 bus_addr = a2 ? 0x51 : 0x50; + int ret; + + msgs[0].addr = bus_addr; + msgs[0].flags = 0; + msgs[0].len = 1 + len; + msgs[0].buf = kmalloc(1 + len, GFP_KERNEL); + if (!msgs[0].buf) + return -ENOMEM; + + msgs[0].buf[0] = dev_addr; + memcpy(&msgs[0].buf[1], buf, len); + + ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs)); + + kfree(msgs[0].buf); + + if (ret < 0) + return ret; + + return ret == ARRAY_SIZE(msgs) ? len : 0; } static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c) @@ -239,6 +265,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c) sfp->i2c = i2c; sfp->read = sfp_i2c_read; + sfp->write = sfp_i2c_write; i2c_mii = mdio_i2c_alloc(sfp->dev, i2c); if (IS_ERR(i2c_mii)) @@ -274,6 +301,11 @@ static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len) return sfp->read(sfp, a2, addr, buf, len); } +static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len) +{ + return sfp->write(sfp, a2, addr, buf, len); +} + static unsigned int sfp_check(void *buf, size_t len) { u8 *p, check; @@ -462,21 +494,83 @@ static void sfp_sm_mod_init(struct sfp *sfp) sfp_sm_probe_phy(sfp); } +static int sfp_sm_mod_hpower(struct sfp *sfp) +{ + u32 power; + u8 val; + int err; + + power = 1000; + if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL)) + power = 1500; + if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL)) + power = 2000; + + if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE && + (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) != + SFP_DIAGMON_DDM) { + /* The module appears not to implement bus address 0xa2, + * or requires an address change sequence, so assume that + * the module powers up in the indicated power mode. + */ + if (power > sfp->max_power_mW) { + dev_err(sfp->dev, + "Host does not support %u.%uW modules\n", + power / 1000, (power / 100) % 10); + return -EINVAL; + } + return 0; + } + + if (power > sfp->max_power_mW) { + dev_warn(sfp->dev, + "Host does not support %u.%uW modules, module left in power mode 1\n", + power / 1000, (power / 100) % 10); + return 0; + } + + if (power <= 1000) + return 0; + + err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val)); + if (err != sizeof(val)) { + dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err); + err = -EAGAIN; + goto err; + } + + val |= BIT(0); + + err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val)); + if (err != sizeof(val)) { + dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err); + err = -EAGAIN; + goto err; + } + + dev_info(sfp->dev, "Module switched to %u.%uW power level\n", + power / 1000, (power / 100) % 10); + return T_HPOWER_LEVEL; + +err: + return err; +} + static int sfp_sm_mod_probe(struct sfp *sfp) { /* SFP module inserted - read I2C data */ struct sfp_eeprom_id id; u8 check; - int err; + int ret; - err = sfp_read(sfp, false, 0, &id, sizeof(id)); - if (err < 0) { - dev_err(sfp->dev, "failed to read EEPROM: %d\n", err); + ret = sfp_read(sfp, false, 0, &id, sizeof(id)); + if (ret < 0) { + dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret); return -EAGAIN; } - if (err != sizeof(id)) { - dev_err(sfp->dev, "EEPROM short read: %d\n", err); + if (ret != sizeof(id)) { + dev_err(sfp->dev, "EEPROM short read: %d\n", ret); return -EAGAIN; } @@ -521,7 +615,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp) dev_warn(sfp->dev, "module address swap to access page 0xA2 is not supported.\n"); - return sfp_module_insert(sfp->sfp_bus, &sfp->id); + ret = sfp_module_insert(sfp->sfp_bus, &sfp->id); + if (ret < 0) + return ret; + + return sfp_sm_mod_hpower(sfp); } static void sfp_sm_mod_remove(struct sfp *sfp) @@ -560,17 +658,25 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) if (event == SFP_E_REMOVE) { sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0); } else if (event == SFP_E_TIMEOUT) { - int err = sfp_sm_mod_probe(sfp); + int val = sfp_sm_mod_probe(sfp); - if (err == 0) + if (val == 0) sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0); - else if (err == -EAGAIN) - sfp_sm_set_timer(sfp, T_PROBE_RETRY); - else + else if (val > 0) + sfp_sm_ins_next(sfp, SFP_MOD_HPOWER, val); + else if (val != -EAGAIN) sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0); + else + sfp_sm_set_timer(sfp, T_PROBE_RETRY); } break; + case SFP_MOD_HPOWER: + if (event == SFP_E_TIMEOUT) { + sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0); + break; + } + /* fallthrough */ case SFP_MOD_PRESENT: case SFP_MOD_ERROR: if (event == SFP_E_REMOVE) { @@ -889,6 +995,14 @@ static int sfp_probe(struct platform_device *pdev) if (!(sfp->gpio[GPIO_MODDEF0])) sfp->get_state = sff_gpio_get_state; + device_property_read_u32(&pdev->dev, "maximum-power-milliwatt", + &sfp->max_power_mW); + if (!sfp->max_power_mW) + sfp->max_power_mW = 1000; + + dev_info(sfp->dev, "Host maximum power %u.%uW\n", + sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10); + sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops); if (!sfp->sfp_bus) return -ENOMEM; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index a468439969df..5dd781e65958 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1105,14 +1105,15 @@ static void team_port_disable_netpoll(struct team_port *port) } #endif -static int team_upper_dev_link(struct team *team, struct team_port *port) +static int team_upper_dev_link(struct team *team, struct team_port *port, + struct netlink_ext_ack *extack) { struct netdev_lag_upper_info lag_upper_info; int err; lag_upper_info.tx_type = team->mode->lag_tx_type; err = netdev_master_upper_dev_link(port->dev, team->dev, NULL, - &lag_upper_info, NULL); + &lag_upper_info, extack); if (err) return err; port->dev->priv_flags |= IFF_TEAM_PORT; @@ -1129,7 +1130,8 @@ static void __team_port_change_port_added(struct team_port *port, bool linkup); static int team_dev_type_check_change(struct net_device *dev, struct net_device *port_dev); -static int team_port_add(struct team *team, struct net_device *port_dev) +static int team_port_add(struct team *team, struct net_device *port_dev, + struct netlink_ext_ack *extack) { struct net_device *dev = team->dev; struct team_port *port; @@ -1137,12 +1139,14 @@ static int team_port_add(struct team *team, struct net_device *port_dev) int err; if (port_dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG(extack, "Loopback device can't be added as a team port"); netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n", portname); return -EINVAL; } if (team_port_exists(port_dev)) { + NL_SET_ERR_MSG(extack, "Device is already a port of a team device"); netdev_err(dev, "Device %s is already a port " "of a team device\n", portname); return -EBUSY; @@ -1150,6 +1154,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) if (port_dev->features & NETIF_F_VLAN_CHALLENGED && vlan_uses_dev(dev)) { + NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up"); netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n", portname); return -EPERM; @@ -1160,6 +1165,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) return err; if (port_dev->flags & IFF_UP) { + NL_SET_ERR_MSG(extack, "Device is up. Set it down before adding it as a team port"); netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n", portname); return -EBUSY; @@ -1227,7 +1233,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) goto err_handler_register; } - err = team_upper_dev_link(team, port); + err = team_upper_dev_link(team, port, extack); if (err) { netdev_err(dev, "Device %s failed to set upper link\n", portname); @@ -1921,7 +1927,7 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev, int err; mutex_lock(&team->lock); - err = team_port_add(team, port_dev); + err = team_port_add(team, port_dev, extack); mutex_unlock(&team->lock); if (!err) diff --git a/include/linux/sfp.h b/include/linux/sfp.h index e724d5a3dd80..ebce9e24906a 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -422,10 +422,11 @@ struct sfp_upstream_ops { #if IS_ENABLED(CONFIG_SFP) int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); -phy_interface_t sfp_parse_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id); void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); +phy_interface_t sfp_select_interface(struct sfp_bus *bus, + const struct sfp_eeprom_id *id, + unsigned long *link_modes); int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo); int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, @@ -444,18 +445,19 @@ static inline int sfp_parse_port(struct sfp_bus *bus, return PORT_OTHER; } -static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id) -{ - return PHY_INTERFACE_MODE_NA; -} - static inline void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support) { } +static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus, + const struct sfp_eeprom_id *id, + unsigned long *link_modes) +{ + return PHY_INTERFACE_MODE_NA; +} + static inline int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo) { diff --git a/include/net/ethoc.h b/include/net/ethoc.h index bb7f467da7fc..29ba069a1d93 100644 --- a/include/net/ethoc.h +++ b/include/net/ethoc.h @@ -21,4 +21,3 @@ struct ethoc_platform_data { }; #endif /* !LINUX_NET_ETHOC_H */ - diff --git a/include/net/flow.h b/include/net/flow.h index f1624fd5b1d0..64e7ee9cb980 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -125,7 +125,7 @@ static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos, fl4->daddr = daddr; fl4->saddr = saddr; } - + struct flowi6 { struct flowi_common __fl_common; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c1a93ce35e62..b68fea022a82 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -49,9 +49,9 @@ struct inet_connection_sock_af_ops { u16 net_header_len; u16 net_frag_header_len; u16 sockaddr_len; - int (*setsockopt)(struct sock *sk, int level, int optname, + int (*setsockopt)(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); - int (*getsockopt)(struct sock *sk, int level, int optname, + int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); #ifdef CONFIG_COMPAT int (*compat_setsockopt)(struct sock *sk, @@ -67,7 +67,7 @@ struct inet_connection_sock_af_ops { /** inet_connection_sock - INET connection oriented sock * - * @icsk_accept_queue: FIFO of established children + * @icsk_accept_queue: FIFO of established children * @icsk_bind_hash: Bind node * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) @@ -122,7 +122,7 @@ struct inet_connection_sock { unsigned long timeout; /* Currently scheduled timeout */ __u32 lrcvtime; /* timestamp of last received data packet */ __u16 last_seg_size; /* Size of last incoming segment */ - __u16 rcv_mss; /* MSS used for delayed ACK decisions */ + __u16 rcv_mss; /* MSS used for delayed ACK decisions */ } icsk_ack; struct { int enabled; @@ -201,7 +201,7 @@ extern const char inet_csk_timer_bug_msg[]; static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) { struct inet_connection_sock *icsk = inet_csk(sk); - + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { icsk->icsk_pending = 0; #ifdef INET_CSK_CLEAR_TIMERS diff --git a/include/net/ip.h b/include/net/ip.h index 746abff9ce51..fe63ba95d12b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -186,15 +186,15 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); void ip4_datagram_release_cb(struct sock *sk); struct ip_reply_arg { - struct kvec iov[1]; + struct kvec iov[1]; int flags; __wsum csum; int csumoffset; /* u16 offset of csum in iov[0].iov_base */ - /* -1 if not needed */ + /* -1 if not needed */ int bound_dev_if; u8 tos; kuid_t uid; -}; +}; #define IP_REPLY_ARG_NOSRCCHECK 1 @@ -577,13 +577,13 @@ int ip_frag_mem(struct net *net); /* * Functions provided by ip_forward.c */ - + int ip_forward(struct sk_buff *skb); - + /* * Functions provided by ip_options.c */ - + void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f80524396c06..15e19c5c6f26 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -157,7 +157,7 @@ struct fib_result_nl { unsigned char nh_sel; unsigned char type; unsigned char scope; - int err; + int err; }; #ifdef CONFIG_IP_ROUTE_MULTIPATH diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 7a98cd583c73..cabd3cdd4015 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -105,8 +105,8 @@ #define IPV6_ADDR_ANY 0x0000U -#define IPV6_ADDR_UNICAST 0x0001U -#define IPV6_ADDR_MULTICAST 0x0002U +#define IPV6_ADDR_UNICAST 0x0001U +#define IPV6_ADDR_MULTICAST 0x0002U #define IPV6_ADDR_LOOPBACK 0x0010U #define IPV6_ADDR_LINKLOCAL 0x0020U @@ -447,7 +447,7 @@ ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, #endif } -static inline void ipv6_addr_prefix(struct in6_addr *pfx, +static inline void ipv6_addr_prefix(struct in6_addr *pfx, const struct in6_addr *addr, int plen) { @@ -496,7 +496,7 @@ static inline void __ipv6_addr_set_half(__be32 *addr, addr[1] = wl; } -static inline void ipv6_addr_set(struct in6_addr *addr, +static inline void ipv6_addr_set(struct in6_addr *addr, __be32 w1, __be32 w2, __be32 w3, __be32 w4) { @@ -732,7 +732,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int } /* - * we should *never* get to this point since that + * we should *never* get to this point since that * would mean the addrs are equal * * However, we do get to it 8) And exacly, when diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 87406252f0a3..e828d31be5da 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -806,6 +806,7 @@ enum tc_prio_command { TC_PRIO_REPLACE, TC_PRIO_DESTROY, TC_PRIO_STATS, + TC_PRIO_GRAFT, }; struct tc_prio_qopt_offload_params { @@ -818,6 +819,11 @@ struct tc_prio_qopt_offload_params { struct gnet_stats_queue *qstats; }; +struct tc_prio_qopt_offload_graft_params { + u8 band; + u32 child_handle; +}; + struct tc_prio_qopt_offload { enum tc_prio_command command; u32 handle; @@ -825,6 +831,8 @@ struct tc_prio_qopt_offload { union { struct tc_prio_qopt_offload_params replace_params; struct tc_qopt_offload_stats stats; + struct tc_prio_qopt_offload_graft_params graft_params; }; }; + #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7d2077665c0b..aa027ba1d032 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1267,12 +1267,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk) static inline void xfrm_sk_free_policy(struct sock *sk) {} static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; } -static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; } -static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } +static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; } +static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb) -{ - return 1; -} +{ + return 1; +} static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb) { return 1; @@ -1356,7 +1356,7 @@ __xfrm6_state_addr_check(const struct xfrm_state *x, { if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) && (ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) || - ipv6_addr_any((struct in6_addr *)saddr) || + ipv6_addr_any((struct in6_addr *)saddr) || ipv6_addr_any((struct in6_addr *)&x->props.saddr))) return 1; return 0; @@ -1666,7 +1666,7 @@ int xfrm_user_policy(struct sock *sk, int optname, static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { return -ENOPROTOOPT; -} +} static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index cd46d7666598..f31e6575ab91 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh) fnhe = rcu_dereference_protected(hash[i].chain, 1); while (fnhe) { struct fib_nh_exception *next; - + next = rcu_dereference_protected(fnhe->fnhe_next, 1); rt_fibinfo_free(&fnhe->fnhe_rth_input); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index fdabc70283b6..d97e83b2dd33 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -556,4 +556,3 @@ int __init ip_misc_proc_init(void) { return register_pernet_subsys(&ip_proc_ops); } - diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index ec35eaa5c029..c0630013c1ae 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister); for (handler = rcu_dereference(head); \ handler != NULL; \ handler = rcu_dereference(handler->next)) \ - + static int tunnel4_rcv(struct sk_buff *skb) { struct xfrm_tunnel *handler; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 796ac4115485..0c752dc3f93b 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -379,4 +379,3 @@ void __init xfrm4_init(void) xfrm4_protocol_init(); register_pernet_subsys(&xfrm4_net_ops); } - diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 8e085cc05aeb..d7d0abc7fd0e 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -552,4 +552,3 @@ void ac6_proc_exit(struct net *net) remove_proc_entry("anycast6", net->proc_net); } #endif - diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 11025f8d124b..b643f5ce6c80 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, return nexthdr; } EXPORT_SYMBOL(ipv6_find_hdr); - diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 24535169663d..4d780c7f0130 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1415,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, } EXPORT_SYMBOL(compat_ipv6_getsockopt); #endif - diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index b8858c546f41..1678cf037688 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -355,4 +355,3 @@ void ipv6_misc_proc_exit(void) { unregister_pernet_subsys(&ipv6_proc_ops); } - diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index b15075a5c227..16f434791763 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -196,4 +196,3 @@ void xfrm6_state_fini(void) { xfrm_state_unregister_afinfo(&xfrm6_state_afinfo); } - diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 27e672c12492..68f9d942bed4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -739,6 +739,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev) void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, unsigned int len) { + bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; @@ -760,8 +761,12 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, * If child was empty even before update then backlog * counter is screwed and we skip notification because * parent class is already passive. + * + * If the original child was offloaded then it is allowed + * to be seem as empty, so the parent is notified anyway. */ - notify = !sch->q.qlen && !WARN_ON_ONCE(!n); + notify = !sch->q.qlen && !WARN_ON_ONCE(!n && + !qdisc_is_offloaded); /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index efbf51f35778..222e53d3d27a 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -142,9 +142,8 @@ prio_reset(struct Qdisc *sch) sch->q.qlen = 0; } -static int prio_offload(struct Qdisc *sch, bool enable) +static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt) { - struct prio_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_prio_qopt_offload opt = { .handle = sch->handle, @@ -154,10 +153,10 @@ static int prio_offload(struct Qdisc *sch, bool enable) if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; - if (enable) { + if (qopt) { opt.command = TC_PRIO_REPLACE; - opt.replace_params.bands = q->bands; - memcpy(&opt.replace_params.priomap, q->prio2band, + opt.replace_params.bands = qopt->bands; + memcpy(&opt.replace_params.priomap, qopt->priomap, TC_PRIO_MAX + 1); opt.replace_params.qstats = &sch->qstats; } else { @@ -174,7 +173,7 @@ prio_destroy(struct Qdisc *sch) struct prio_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); - prio_offload(sch, false); + prio_offload(sch, NULL); for (prio = 0; prio < q->bands; prio++) qdisc_destroy(q->queues[prio]); } @@ -211,6 +210,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, } } + prio_offload(sch, qopt); sch_tree_lock(sch); q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); @@ -230,7 +230,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, } sch_tree_unlock(sch); - prio_offload(sch, true); return 0; } @@ -309,12 +308,44 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct prio_sched_data *q = qdisc_priv(sch); + struct tc_prio_qopt_offload graft_offload; + struct net_device *dev = qdisc_dev(sch); unsigned long band = arg - 1; + bool any_qdisc_is_offloaded; + int err; if (new == NULL) new = &noop_qdisc; *old = qdisc_replace(sch, new, &q->queues[band]); + + if (!tc_can_offload(dev)) + return 0; + + graft_offload.handle = sch->handle; + graft_offload.parent = sch->parent; + graft_offload.graft_params.band = band; + graft_offload.graft_params.child_handle = new->handle; + graft_offload.command = TC_PRIO_GRAFT; + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, + &graft_offload); + + /* Don't report error if the graft is part of destroy operation. */ + if (err && new != &noop_qdisc) { + /* Don't report error if the parent, the old child and the new + * one are not offloaded. + */ + any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; + any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED; + if (*old) + any_qdisc_is_offloaded |= (*old)->flags & + TCQ_F_OFFLOADED; + + if (any_qdisc_is_offloaded) + NL_SET_ERR_MSG(extack, "Offloading graft operation failed."); + } + return 0; } diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore new file mode 100644 index 000000000000..a793eef5b876 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/.gitignore @@ -0,0 +1 @@ +forwarding.config diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README new file mode 100644 index 000000000000..4a0964c42860 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/README @@ -0,0 +1,56 @@ +Motivation +========== + +One of the nice things about network namespaces is that they allow one +to easily create and test complex environments. + +Unfortunately, these namespaces can not be used with actual switching +ASICs, as their ports can not be migrated to other network namespaces +(NETIF_F_NETNS_LOCAL) and most of them probably do not support the +L1-separation provided by namespaces. + +However, a similar kind of flexibility can be achieved by using VRFs and +by looping the switch ports together. For example: + + br0 + + + vrf-h1 | vrf-h2 + + +---+----+ + + | | | | + 192.0.2.1/24 + + + + 192.0.2.2/24 + swp1 swp2 swp3 swp4 + + + + + + | | | | + +--------+ +--------+ + +The VRFs act as lightweight namespaces representing hosts connected to +the switch. + +This approach for testing switch ASICs has several advantages over the +traditional method that requires multiple physical machines, to name a +few: + +1. Only the device under test (DUT) is being tested without noise from +other system. + +2. Ability to easily provision complex topologies. Testing bridging +between 4-ports LAGs or 8-way ECMP requires many physical links that are +not always available. With the VRF-based approach one merely needs to +loopback more ports. + +These tests are written with switch ASICs in mind, but they can be run +on any Linux box using veth pairs to emulate physical loopbacks. + +Guidelines for Writing Tests +============================ + +o Where possible, reuse an existing topology for different tests instead + of recreating the same topology. +o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and + RFC 5737, respectively. +o Where possible, tests shall be written so that they can be reused by + multiple topologies and added to lib.sh. +o Checks shall be added to lib.sh for any external dependencies. +o Code shall be checked using ShellCheck [1] prior to submission. + +1. https://www.shellcheck.net/ diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh new file mode 100755 index 000000000000..651998e70557 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +switch_create() +{ + # 10 Seconds ageing time. + ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \ + mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +ping_test $h1 192.0.2.2 +ping6_test $h1 2001:db8:1::2 +learning_test "br0" $swp1 $h1 $h2 +flood_test $swp2 $h1 $h2 + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config new file mode 100644 index 000000000000..5cd2aed97958 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/config @@ -0,0 +1,12 @@ +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NET_VRF=m +CONFIG_BPF_SYSCALL=y +CONFIG_CGROUP_BPF=y +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_ACT_GACT=m +CONFIG_VETH=m diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample new file mode 100644 index 000000000000..ab235c124f20 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +############################################################################## +# Topology description. p1 looped back to p2, p3 to p4 and so on. +declare -A NETIFS + +NETIFS[p1]=veth0 +NETIFS[p2]=veth1 +NETIFS[p3]=veth2 +NETIFS[p4]=veth3 +NETIFS[p5]=veth4 +NETIFS[p6]=veth5 +NETIFS[p7]=veth6 +NETIFS[p8]=veth7 + +############################################################################## +# Defines + +# IPv4 ping utility name +PING=ping +# IPv6 ping utility name. Some distributions use 'ping' for IPv6. +PING6=ping6 +# Packet generator. Some distributions use 'mz'. +MZ=mausezahn +# Time to wait after interfaces participating in the test are all UP +WAIT_TIME=5 +# Whether to pause on failure or not. +PAUSE_ON_FAIL=no +# Whether to pause on cleanup or not. +PAUSE_ON_CLEANUP=no diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh new file mode 100644 index 000000000000..23866a685f77 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -0,0 +1,533 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +############################################################################## +# Defines + +# Can be overridden by the configuration file. +PING=${PING:=ping} +PING6=${PING6:=ping6} +MZ=${MZ:=mausezahn} +WAIT_TIME=${WAIT_TIME:=5} +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} +PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no} + +if [[ -f forwarding.config ]]; then + source forwarding.config +fi + +############################################################################## +# Sanity checks + +if [[ "$(id -u)" -ne 0 ]]; then + echo "SKIP: need root privileges" + exit 0 +fi + +tc -j &> /dev/null +if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old, missing JSON support" + exit 0 +fi + +tc filter help 2>&1 | grep block &> /dev/null +if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old, missing shared block support" + exit 0 +fi + +if [[ ! -x "$(command -v jq)" ]]; then + echo "SKIP: jq not installed" + exit 0 +fi + +if [[ ! -x "$(command -v $MZ)" ]]; then + echo "SKIP: $MZ not installed" + exit 0 +fi + +if [[ ! -v NUM_NETIFS ]]; then + echo "SKIP: importer does not define \"NUM_NETIFS\"" + exit 0 +fi + +############################################################################## +# Command line options handling + +count=0 + +while [[ $# -gt 0 ]]; do + if [[ "$count" -eq "0" ]]; then + unset NETIFS + declare -A NETIFS + fi + count=$((count + 1)) + NETIFS[p$count]="$1" + shift +done + +############################################################################## +# Network interfaces configuration + +for i in $(eval echo {1..$NUM_NETIFS}); do + ip link show dev ${NETIFS[p$i]} &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: could not find all required interfaces" + exit 0 + fi +done + +############################################################################## +# Helpers + +# Exit status to return at the end. Set in case one of the tests fails. +EXIT_STATUS=0 +# Per-test return value. Clear at the beginning of each test. +RET=0 + +check_err() +{ + local err=$1 + local msg=$2 + + if [[ $RET -eq 0 && $err -ne 0 ]]; then + RET=$err + retmsg=$msg + fi +} + +check_fail() +{ + local err=$1 + local msg=$2 + + if [[ $RET -eq 0 && $err -eq 0 ]]; then + RET=1 + retmsg=$msg + fi +} + +log_test() +{ + local test_name=$1 + local opt_str=$2 + + if [[ $# -eq 2 ]]; then + opt_str="($opt_str)" + fi + + if [[ $RET -ne 0 ]]; then + EXIT_STATUS=1 + printf "TEST: %-60s [FAIL]\n" "$test_name $opt_str" + if [[ ! -z "$retmsg" ]]; then + printf "\t%s\n" "$retmsg" + fi + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo "Hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + return 1 + fi + + printf "TEST: %-60s [PASS]\n" "$test_name $opt_str" + return 0 +} + +log_info() +{ + local msg=$1 + + echo "INFO: $msg" +} + +setup_wait() +{ + for i in $(eval echo {1..$NUM_NETIFS}); do + while true; do + ip link show dev ${NETIFS[p$i]} up \ + | grep 'state UP' &> /dev/null + if [[ $? -ne 0 ]]; then + sleep 1 + else + break + fi + done + done + + # Make sure links are ready. + sleep $WAIT_TIME +} + +pre_cleanup() +{ + if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then + echo "Pausing before cleanup, hit any key to continue" + read + fi +} + +vrf_prepare() +{ + ip -4 rule add pref 32765 table local + ip -4 rule del pref 0 + ip -6 rule add pref 32765 table local + ip -6 rule del pref 0 +} + +vrf_cleanup() +{ + ip -6 rule add pref 0 table local + ip -6 rule del pref 32765 + ip -4 rule add pref 0 table local + ip -4 rule del pref 32765 +} + +__last_tb_id=0 +declare -A __TB_IDS + +__vrf_td_id_assign() +{ + local vrf_name=$1 + + __last_tb_id=$((__last_tb_id + 1)) + __TB_IDS[$vrf_name]=$__last_tb_id + return $__last_tb_id +} + +__vrf_td_id_lookup() +{ + local vrf_name=$1 + + return ${__TB_IDS[$vrf_name]} +} + +vrf_create() +{ + local vrf_name=$1 + local tb_id + + __vrf_td_id_assign $vrf_name + tb_id=$? + + ip link add dev $vrf_name type vrf table $tb_id + ip -4 route add table $tb_id unreachable default metric 4278198272 + ip -6 route add table $tb_id unreachable default metric 4278198272 +} + +vrf_destroy() +{ + local vrf_name=$1 + local tb_id + + __vrf_td_id_lookup $vrf_name + tb_id=$? + + ip -6 route del table $tb_id unreachable default metric 4278198272 + ip -4 route del table $tb_id unreachable default metric 4278198272 + ip link del dev $vrf_name +} + +__addr_add_del() +{ + local if_name=$1 + local add_del=$2 + local array + + shift + shift + array=("${@}") + + for addrstr in "${array[@]}"; do + ip address $add_del $addrstr dev $if_name + done +} + +simple_if_init() +{ + local if_name=$1 + local vrf_name + local array + + shift + vrf_name=v$if_name + array=("${@}") + + vrf_create $vrf_name + ip link set dev $if_name master $vrf_name + ip link set dev $vrf_name up + ip link set dev $if_name up + + __addr_add_del $if_name add "${array[@]}" +} + +simple_if_fini() +{ + local if_name=$1 + local vrf_name + local array + + shift + vrf_name=v$if_name + array=("${@}") + + __addr_add_del $if_name del "${array[@]}" + + ip link set dev $if_name down + vrf_destroy $vrf_name +} + +master_name_get() +{ + local if_name=$1 + + ip -j link show dev $if_name | jq -r '.[]["master"]' +} + +link_stats_tx_packets_get() +{ + local if_name=$1 + + ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]' +} + +mac_get() +{ + local if_name=$1 + + ip -j link show dev $if_name | jq -r '.[]["address"]' +} + +bridge_ageing_time_get() +{ + local bridge=$1 + local ageing_time + + # Need to divide by 100 to convert to seconds. + ageing_time=$(ip -j -d link show dev $bridge \ + | jq '.[]["linkinfo"]["info_data"]["ageing_time"]') + echo $((ageing_time / 100)) +} + +forwarding_enable() +{ + ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding) + ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding) + + sysctl -q -w net.ipv4.conf.all.forwarding=1 + sysctl -q -w net.ipv6.conf.all.forwarding=1 +} + +forwarding_restore() +{ + sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd + sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd +} + +tc_offload_check() +{ + for i in $(eval echo {1..$NUM_NETIFS}); do + ethtool -k ${NETIFS[p$i]} \ + | grep "hw-tc-offload: on" &> /dev/null + if [[ $? -ne 0 ]]; then + return 1 + fi + done + + return 0 +} + +############################################################################## +# Tests + +ping_test() +{ + local if_name=$1 + local dip=$2 + local vrf_name + + RET=0 + + vrf_name=$(master_name_get $if_name) + ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null + check_err $? + log_test "ping" +} + +ping6_test() +{ + local if_name=$1 + local dip=$2 + local vrf_name + + RET=0 + + vrf_name=$(master_name_get $if_name) + ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null + check_err $? + log_test "ping6" +} + +learning_test() +{ + local bridge=$1 + local br_port1=$2 # Connected to `host1_if`. + local host1_if=$3 + local host2_if=$4 + local mac=de:ad:be:ef:13:37 + local ageing_time + + RET=0 + + bridge -j fdb show br $bridge brport $br_port1 \ + | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null + check_fail $? "Found FDB record when should not" + + # Disable unknown unicast flooding on `br_port1` to make sure + # packets are only forwarded through the port after a matching + # FDB entry was installed. + bridge link set dev $br_port1 flood off + + tc qdisc add dev $host1_if ingress + tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q + sleep 1 + + tc -j -s filter show dev $host1_if ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_fail $? "Packet reached second host when should not" + + $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q + sleep 1 + + bridge -j fdb show br $bridge brport $br_port1 \ + | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null + check_err $? "Did not find FDB record when should" + + $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q + sleep 1 + + tc -j -s filter show dev $host1_if ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet did not reach second host when should" + + # Wait for 10 seconds after the ageing time to make sure FDB + # record was aged-out. + ageing_time=$(bridge_ageing_time_get $bridge) + sleep $((ageing_time + 10)) + + bridge -j fdb show br $bridge brport $br_port1 \ + | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null + check_fail $? "Found FDB record when should not" + + bridge link set dev $br_port1 learning off + + $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q + sleep 1 + + bridge -j fdb show br $bridge brport $br_port1 \ + | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null + check_fail $? "Found FDB record when should not" + + bridge link set dev $br_port1 learning on + + tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower + tc qdisc del dev $host1_if ingress + + bridge link set dev $br_port1 flood on + + log_test "FDB learning" +} + +flood_test_do() +{ + local should_flood=$1 + local mac=$2 + local ip=$3 + local host1_if=$4 + local host2_if=$5 + local err=0 + + # Add an ACL on `host2_if` which will tell us whether the packet + # was flooded to it or not. + tc qdisc add dev $host2_if ingress + tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q + sleep 1 + + tc -j -s filter show dev $host2_if ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + if [[ $? -ne 0 && $should_flood == "true" || \ + $? -eq 0 && $should_flood == "false" ]]; then + err=1 + fi + + tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower + tc qdisc del dev $host2_if ingress + + return $err +} + +flood_unicast_test() +{ + local br_port=$1 + local host1_if=$2 + local host2_if=$3 + local mac=de:ad:be:ef:13:37 + local ip=192.0.2.100 + + RET=0 + + bridge link set dev $br_port flood off + + flood_test_do false $mac $ip $host1_if $host2_if + check_err $? "Packet flooded when should not" + + bridge link set dev $br_port flood on + + flood_test_do true $mac $ip $host1_if $host2_if + check_err $? "Packet was not flooded when should" + + log_test "Unknown unicast flood" +} + +flood_multicast_test() +{ + local br_port=$1 + local host1_if=$2 + local host2_if=$3 + local mac=01:00:5e:00:00:01 + local ip=239.0.0.1 + + RET=0 + + bridge link set dev $br_port mcast_flood off + + flood_test_do false $mac $ip $host1_if $host2_if + check_err $? "Packet flooded when should not" + + bridge link set dev $br_port mcast_flood on + + flood_test_do true $mac $ip $host1_if $host2_if + check_err $? "Packet was not flooded when should" + + log_test "Unregistered multicast flood" +} + +flood_test() +{ + # `br_port` is connected to `host2_if` + local br_port=$1 + local host1_if=$2 + local host2_if=$3 + + flood_unicast_test $br_port $host1_if $host2_if + flood_multicast_test $br_port $host1_if $host2_if +} diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh new file mode 100755 index 000000000000..cc6a14abfa87 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev $h1 master vrf-h1 + + ip link set dev vrf-h1 up + ip link set dev $h1 up + + ip address add 192.0.2.2/24 dev $h1 + ip address add 2001:db8:1::2/64 dev $h1 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + ip address del 2001:db8:1::2/64 dev $h1 + ip address del 192.0.2.2/24 dev $h1 + + ip link set dev $h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev $h2 master vrf-h2 + + ip link set dev vrf-h2 up + ip link set dev $h2 up + + ip address add 198.51.100.2/24 dev $h2 + ip address add 2001:db8:2::2/64 dev $h2 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + ip address del 2001:db8:2::2/64 dev $h2 + ip address del 198.51.100.2/24 dev $h2 + + ip link set dev $h2 down + vrf_destroy "vrf-h2" +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + ip address add 192.0.2.1/24 dev $rp1 + ip address add 2001:db8:1::1/64 dev $rp1 + + ip address add 198.51.100.1/24 dev $rp2 + ip address add 2001:db8:2::1/64 dev $rp2 +} + +router_destroy() +{ + ip address del 2001:db8:2::1/64 dev $rp2 + ip address del 198.51.100.1/24 dev $rp2 + + ip address del 2001:db8:1::1/64 dev $rp1 + ip address del 192.0.2.1/24 dev $rp1 + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +ping_test $h1 198.51.100.2 +ping6_test $h1 2001:db8:2::2 + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh new file mode 100755 index 000000000000..d31888e3133e --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -0,0 +1,322 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=8 +source lib.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev $h1 master vrf-h1 + + ip link set dev vrf-h1 up + ip link set dev $h1 up + + ip address add 192.0.2.2/24 dev $h1 + ip address add 2001:db8:1::2/64 dev $h1 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + ip address del 2001:db8:1::2/64 dev $h1 + ip address del 192.0.2.2/24 dev $h1 + + ip link set dev $h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev $h2 master vrf-h2 + + ip link set dev vrf-h2 up + ip link set dev $h2 up + + ip address add 198.51.100.2/24 dev $h2 + ip address add 2001:db8:2::2/64 dev $h2 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + ip address del 2001:db8:2::2/64 dev $h2 + ip address del 198.51.100.2/24 dev $h2 + + ip link set dev $h2 down + vrf_destroy "vrf-h2" +} + +router1_create() +{ + vrf_create "vrf-r1" + ip link set dev $rp11 master vrf-r1 + ip link set dev $rp12 master vrf-r1 + ip link set dev $rp13 master vrf-r1 + + ip link set dev vrf-r1 up + ip link set dev $rp11 up + ip link set dev $rp12 up + ip link set dev $rp13 up + + ip address add 192.0.2.1/24 dev $rp11 + ip address add 2001:db8:1::1/64 dev $rp11 + + ip address add 169.254.2.12/24 dev $rp12 + ip address add fe80:2::12/64 dev $rp12 + + ip address add 169.254.3.13/24 dev $rp13 + ip address add fe80:3::13/64 dev $rp13 + + ip route add 198.51.100.0/24 vrf vrf-r1 \ + nexthop via 169.254.2.22 dev $rp12 \ + nexthop via 169.254.3.23 dev $rp13 + ip route add 2001:db8:2::/64 vrf vrf-r1 \ + nexthop via fe80:2::22 dev $rp12 \ + nexthop via fe80:3::23 dev $rp13 +} + +router1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-r1 + ip route del 198.51.100.0/24 vrf vrf-r1 + + ip address del fe80:3::13/64 dev $rp13 + ip address del 169.254.3.13/24 dev $rp13 + + ip address del fe80:2::12/64 dev $rp12 + ip address del 169.254.2.12/24 dev $rp12 + + ip address del 2001:db8:1::1/64 dev $rp11 + ip address del 192.0.2.1/24 dev $rp11 + + ip link set dev $rp13 down + ip link set dev $rp12 down + ip link set dev $rp11 down + + vrf_destroy "vrf-r1" +} + +router2_create() +{ + vrf_create "vrf-r2" + ip link set dev $rp21 master vrf-r2 + ip link set dev $rp22 master vrf-r2 + ip link set dev $rp23 master vrf-r2 + + ip link set dev vrf-r2 up + ip link set dev $rp21 up + ip link set dev $rp22 up + ip link set dev $rp23 up + + ip address add 198.51.100.1/24 dev $rp21 + ip address add 2001:db8:2::1/64 dev $rp21 + + ip address add 169.254.2.22/24 dev $rp22 + ip address add fe80:2::22/64 dev $rp22 + + ip address add 169.254.3.23/24 dev $rp23 + ip address add fe80:3::23/64 dev $rp23 + + ip route add 192.0.2.0/24 vrf vrf-r2 \ + nexthop via 169.254.2.12 dev $rp22 \ + nexthop via 169.254.3.13 dev $rp23 + ip route add 2001:db8:1::/64 vrf vrf-r2 \ + nexthop via fe80:2::12 dev $rp22 \ + nexthop via fe80:3::13 dev $rp23 +} + +router2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-r2 + ip route del 192.0.2.0/24 vrf vrf-r2 + + ip address del fe80:3::23/64 dev $rp23 + ip address del 169.254.3.23/24 dev $rp23 + + ip address del fe80:2::22/64 dev $rp22 + ip address del 169.254.2.22/24 dev $rp22 + + ip address del 2001:db8:2::1/64 dev $rp21 + ip address del 198.51.100.1/24 dev $rp21 + + ip link set dev $rp23 down + ip link set dev $rp22 down + ip link set dev $rp21 down + + vrf_destroy "vrf-r2" +} + +multipath_eval() +{ + local weight_rp12=$1 + local weight_rp13=$2 + local packets_rp12=$3 + local packets_rp13=$4 + local weights_ratio packets_ratio diff + + RET=0 + + if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then + weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \ + | bc -l) + packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \ + | bc -l) + else + weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \ + bc -l) + packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \ + bc -l) + fi + + diff=$(echo $weights_ratio - $packets_ratio | bc -l) + diff=${diff#-} + + test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0 + check_err $? "Too large discrepancy between expected and measured ratios" + log_test "Multipath" + log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio" +} + +multipath4_test() +{ + local weight_rp12=$1 + local weight_rp13=$2 + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 + local packets_rp12 packets_rp13 + local hash_policy + + # Transmit multiple flows from h1 to h2 and make sure they are + # distributed between both multipath links (rp12 and rp13) + # according to the configured weights. + hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy) + sysctl -q -w net.ipv4.fib_multipath_hash_policy=1 + ip route replace 198.51.100.0/24 vrf vrf-r1 \ + nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \ + nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13 + + t0_rp12=$(link_stats_tx_packets_get $rp12) + t0_rp13=$(link_stats_tx_packets_get $rp13) + + ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + t1_rp12=$(link_stats_tx_packets_get $rp12) + t1_rp13=$(link_stats_tx_packets_get $rp13) + + let "packets_rp12 = $t1_rp12 - $t0_rp12" + let "packets_rp13 = $t1_rp13 - $t0_rp13" + multipath_eval $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 + + # Restore settings. + ip route replace 198.51.100.0/24 vrf vrf-r1 \ + nexthop via 169.254.2.22 dev $rp12 \ + nexthop via 169.254.3.23 dev $rp13 + sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy +} + +multipath6_test() +{ + local weight_rp12=$1 + local weight_rp13=$2 + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 + local packets_rp12 packets_rp13 + + ip route replace 2001:db8:2::/64 vrf vrf-r1 \ + nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \ + nexthop via fe80:3::23 dev $rp13 weight $weight_rp13 + + t0_rp12=$(link_stats_tx_packets_get $rp12) + t0_rp13=$(link_stats_tx_packets_get $rp13) + + # Generate 16384 echo requests, each with a random flow label. + for _ in $(seq 1 16384); do + ip vrf exec vrf-h1 ping 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null + done + + t1_rp12=$(link_stats_tx_packets_get $rp12) + t1_rp13=$(link_stats_tx_packets_get $rp13) + + let "packets_rp12 = $t1_rp12 - $t0_rp12" + let "packets_rp13 = $t1_rp13 - $t0_rp13" + multipath_eval $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 + + ip route replace 2001:db8:2::/64 vrf vrf-r1 \ + nexthop via fe80:2::22 dev $rp12 \ + nexthop via fe80:3::23 dev $rp13 +} + +multipath_test() +{ + log_info "Running IPv4 multipath tests" + multipath4_test 1 1 + multipath4_test 2 1 + multipath4_test 11 45 + + log_info "Running IPv6 multipath tests" + multipath6_test 1 1 + multipath6_test 2 1 + multipath6_test 11 45 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp11=${NETIFS[p2]} + + rp12=${NETIFS[p3]} + rp22=${NETIFS[p4]} + + rp13=${NETIFS[p5]} + rp23=${NETIFS[p6]} + + rp21=${NETIFS[p7]} + h2=${NETIFS[p8]} + + vrf_prepare + + h1_create + h2_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +ping_test $h1 198.51.100.2 +ping6_test $h1 2001:db8:2::2 +multipath_test + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh new file mode 100755 index 000000000000..84234317a25d --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -0,0 +1,195 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/24 +} + +switch_create() +{ + simple_if_init $swp1 192.0.2.2/24 + tc qdisc add dev $swp1 clsact + + simple_if_init $swp2 192.0.2.1/24 +} + +switch_destroy() +{ + simple_if_fini $swp2 192.0.2.1/24 + + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 192.0.2.2/24 +} + +mirred_egress_redirect_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched without redirect rule inserted" + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action mirred egress redirect \ + dev $swp2 + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match incoming redirected packet" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + log_test "mirred egress redirect ($tcflags)" +} + +gact_drop_and_ok_test() +{ + RET=0 + + tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \ + skip_hw dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 102 1 + check_err $? "Packet was not dropped" + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action ok + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 101 1 + check_err $? "Did not see trapped packet" + + tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + log_test "gact drop and ok ($tcflags)" +} + +gact_trap_test() +{ + RET=0 + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 192.0.2.2 action drop + tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.2.2 action mirred egress redirect \ + dev $swp2 + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 101 1 + check_fail $? "Saw packet without trap rule inserted" + + tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action trap + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 102 1 + check_err $? "Packet was not trapped" + + tc_check_packets "dev $swp1 ingress" 101 1 + check_err $? "Did not see trapped packet" + + tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + log_test "trap ($tcflags)" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + swp1origmac=$(mac_get $swp1) + swp2origmac=$(mac_get $swp2) + ip link set $swp1 address $h2mac + ip link set $swp2 address $h1mac + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + ip link set $swp2 address $swp2origmac + ip link set $swp1 address $swp1origmac +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +gact_drop_and_ok_test +mirred_egress_redirect_test + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + gact_drop_and_ok_test + mirred_egress_redirect_test + gact_trap_test +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh new file mode 100755 index 000000000000..94c114ad8b44 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_chains.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=2 +source lib.sh +source tc_common.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/24 +} + +unreachable_chain_test() +{ + RET=0 + + tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ + flower $tcflags dst_mac $h2mac action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 1101 1 + check_fail $? "matched on filter in unreachable chain" + + tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ + flower + + log_test "unreachable chain ($tcflags)" +} + +gact_goto_chain_test() +{ + RET=0 + + tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ + flower $tcflags dst_mac $h2mac action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_mac $h2mac action drop + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_mac $h2mac action goto chain 1 + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct filter with goto chain action" + + tc_check_packets "dev $h2 ingress" 1101 1 + check_err $? "Did not match on correct filter in chain 1" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ + flower + + log_test "gact goto chain ($tcflags)" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +unreachable_chain_test +gact_goto_chain_test + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + unreachable_chain_test + gact_goto_chain_test +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh new file mode 100644 index 000000000000..acd0b520241c --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_common.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +tc_check_packets() +{ + local id=$1 + local handle=$2 + local count=$3 + local ret + + output="$(tc -j -s filter show $id)" + # workaround the jq bug which causes jq to return 0 in case input is "" + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + echo $output | \ + jq -e ".[] \ + | select(.options.handle == $handle) \ + | select(.options.actions[0].stats.packets == $count)" \ + &> /dev/null + return $? +} diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh new file mode 100755 index 000000000000..026a4ea4b2fb --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -0,0 +1,196 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=2 +source lib.sh +source tc_common.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 198.51.100.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 198.51.100.2/24 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24 +} + +match_dst_mac_test() +{ + local dummy_mac=de:ad:be:ef:aa:aa + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_mac $dummy_mac action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_mac $h2mac action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "dst_mac match ($tcflags)" +} + +match_src_mac_test() +{ + local dummy_mac=de:ad:be:ef:aa:aa + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags src_mac $dummy_mac action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags src_mac $h1mac action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "src_mac match ($tcflags)" +} + +match_dst_ip_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 198.51.100.2 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.2.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Did not match on correct filter with mask" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + + log_test "dst_ip match ($tcflags)" +} + +match_src_ip_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags src_ip 198.51.100.1 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags src_ip 192.0.2.1 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags src_ip 192.0.2.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Did not match on correct filter with mask" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + + log_test "src_ip match ($tcflags)" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +match_dst_mac_test +match_src_mac_test +match_dst_ip_test +match_src_ip_test + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + match_dst_mac_test + match_src_mac_test + match_dst_ip_test + match_src_ip_test +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh new file mode 100755 index 000000000000..cfc8a2ace388 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.1/24 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.1/24 +} + +switch_create() +{ + simple_if_init $swp1 192.0.2.2/24 + tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact + + simple_if_init $swp2 192.0.2.2/24 + tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + simple_if_fini $swp2 192.0.2.2/24 + + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 192.0.2.2/24 +} + +shared_block_test() +{ + RET=0 + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "block 22" 101 1 + check_err $? "Did not match first incoming packet on a block" + + $MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "block 22" 101 2 + check_err $? "Did not match second incoming packet on a block" + + tc filter del block 22 protocol ip pref 1 handle 101 flower + + log_test "shared block ($tcflags)" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + swmac=$(mac_get $swp1) + swp2origmac=$(mac_get $swp2) + ip link set $swp2 address $swmac + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + ip link set $swp2 address $swp2origmac +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +shared_block_test + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + shared_block_test +fi + +exit $EXIT_STATUS |