diff options
Diffstat (limited to 'include')
54 files changed, 965 insertions, 263 deletions
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 8ff86b4c1b8a..d3339dd48b1a 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -14,6 +14,7 @@ #define PHY_ID_BCM5241 0x0143bc30 #define PHY_ID_BCMAC131 0x0143bc70 #define PHY_ID_BCM5481 0x0143bca0 +#define PHY_ID_BCM5395 0x0143bcf0 #define PHY_ID_BCM54810 0x03625d00 #define PHY_ID_BCM5482 0x0143bcb0 #define PHY_ID_BCM5411 0x00206070 diff --git a/include/linux/dsa/lan9303.h b/include/linux/dsa/lan9303.h index f48a85c377de..b6514c29563f 100644 --- a/include/linux/dsa/lan9303.h +++ b/include/linux/dsa/lan9303.h @@ -26,6 +26,7 @@ struct lan9303 { bool phy_addr_sel_strap; struct dsa_switch *ds; struct mutex indirect_mutex; /* protect indexed register access */ + struct mutex alr_mutex; /* protect ALR access */ const struct lan9303_phy_ops *ops; bool is_bridged; /* true if port 1 and 2 are bridged */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 80b5b482cb46..0062302e1285 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -985,6 +985,7 @@ struct bpf_sock_ops_kern { u32 reply; u32 replylong[4]; }; + u32 is_fullsock; }; #endif /* __LINUX_FILTER_H__ */ diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 6c9336626592..93bd6fcd6e62 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -127,28 +127,6 @@ struct hv_ring_buffer_info { u32 priv_read_index; }; -/* - * - * hv_get_ringbuffer_availbytes() - * - * Get number of bytes available to read and to write to - * for the specified ring buffer - */ -static inline void -hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi, - u32 *read, u32 *write) -{ - u32 read_loc, write_loc, dsize; - - /* Capture the read/write indices before they changed */ - read_loc = rbi->ring_buffer->read_index; - write_loc = rbi->ring_buffer->write_index; - dsize = rbi->ring_datasize; - - *write = write_loc >= read_loc ? dsize - (write_loc - read_loc) : - read_loc - write_loc; - *read = dsize - *write; -} static inline u32 hv_get_bytes_to_read(const struct hv_ring_buffer_info *rbi) { diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index bedf54b6f943..4cb7aeeafce0 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -30,10 +30,10 @@ struct macvlan_dev { enum macvlan_mode mode; u16 flags; int nest_level; + unsigned int macaddr_count; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif - unsigned int macaddr_count; }; static inline void macvlan_count_rx(const struct macvlan_dev *vlan, diff --git a/include/linux/mdio.h b/include/linux/mdio.h index ca08ab16ecdc..e37c21d8eb19 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -12,6 +12,7 @@ #include <uapi/linux/mdio.h> #include <linux/mod_devicetable.h> +struct gpio_desc; struct mii_bus; /* Multiple levels of nesting are possible. However typically this is @@ -39,6 +40,9 @@ struct mdio_device { /* Bus address of the MDIO device (0-31) */ int addr; int flags; + struct gpio_desc *reset; + unsigned int reset_delay; + unsigned int reset_post_delay; }; #define to_mdio_device(d) container_of(d, struct mdio_device, dev) @@ -71,6 +75,7 @@ void mdio_device_free(struct mdio_device *mdiodev); struct mdio_device *mdio_device_create(struct mii_bus *bus, int addr); int mdio_device_register(struct mdio_device *mdiodev); void mdio_device_remove(struct mdio_device *mdiodev); +void mdio_device_reset(struct mdio_device *mdiodev, int value); int mdio_driver_register(struct mdio_driver *drv); void mdio_driver_unregister(struct mdio_driver *drv); int mdio_device_bus_match(struct device *dev, struct device_driver *drv); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ef789e1d679e..cc4ce7456e38 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -820,6 +820,8 @@ struct netdev_bpf { struct { u8 prog_attached; u32 prog_id; + /* flags with which program was installed */ + u32 prog_flags; }; /* BPF_OFFLOAD_VERIFIER_PREP */ struct { @@ -3330,7 +3332,8 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags); -u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t xdp_op, u32 *prog_id); +void __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op, + struct netdev_bpf *xdp); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); diff --git a/include/linux/pci.h b/include/linux/pci.h index c170c9250c8b..0314e0716c30 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1072,6 +1072,7 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state); int pci_set_cacheline_size(struct pci_dev *dev); #define HAVE_PCI_SET_MWI int __must_check pci_set_mwi(struct pci_dev *dev); +int __must_check pcim_set_mwi(struct pci_dev *dev); int pci_try_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); void pci_intx(struct pci_dev *dev, int enable); diff --git a/include/linux/phy.h b/include/linux/phy.h index dc82a07cb4fd..c4b4715caa21 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -59,6 +59,7 @@ #define PHY_HAS_INTERRUPT 0x00000001 #define PHY_IS_INTERNAL 0x00000002 +#define PHY_RST_AFTER_CLK_EN 0x00000004 #define MDIO_DEVICE_IS_PHY 0x80000000 /* Interface Mode definitions */ @@ -468,7 +469,6 @@ struct phy_device { /* Interrupt and Polling infrastructure */ struct work_struct phy_queue; struct delayed_work state_queue; - atomic_t irq_disable; struct mutex lock; @@ -497,19 +497,19 @@ struct phy_device { * flags: A bitfield defining certain other features this PHY * supports (like interrupts) * - * The drivers must implement config_aneg and read_status. All - * other functions are optional. Note that none of these - * functions should be called from interrupt time. The goal is - * for the bus read/write functions to be able to block when the - * bus transaction is happening, and be freed up by an interrupt - * (The MPC85xx has this ability, though it is not currently - * supported in the driver). + * All functions are optional. If config_aneg or read_status + * are not implemented, the phy core uses the genphy versions. + * Note that none of these functions should be called from + * interrupt time. The goal is for the bus read/write functions + * to be able to block when the bus transaction is happening, + * and be freed up by an interrupt (The MPC85xx has this ability, + * though it is not currently supported in the driver). */ struct phy_driver { struct mdio_driver_common mdiodrv; u32 phy_id; char *name; - unsigned int phy_id_mask; + u32 phy_id_mask; u32 features; u32 flags; const void *driver_data; @@ -763,6 +763,20 @@ static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode) }; /** + * phy_interface_mode_is_8023z() - does the phy interface mode use 802.3z + * negotiation + * @mode: one of &enum phy_interface_t + * + * Returns true if the phy interface mode uses the 16-bit negotiation + * word as defined in 802.3z. (See 802.3-2015 37.2.1 Config_Reg encoding) + */ +static inline bool phy_interface_mode_is_8023z(phy_interface_t mode) +{ + return mode == PHY_INTERFACE_MODE_1000BASEX || + mode == PHY_INTERFACE_MODE_2500BASEX; +} + +/** * phy_interface_is_rgmii - Convenience function for testing if a PHY interface * is RGMII (all variants) * @phydev: the phy_device struct @@ -840,13 +854,11 @@ int phy_aneg_done(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); int phy_restart_aneg(struct phy_device *phydev); +int phy_reset_after_clk_enable(struct phy_device *phydev); -static inline int phy_read_status(struct phy_device *phydev) +static inline void phy_device_reset(struct phy_device *phydev, int value) { - if (!phydev->drv) - return -EIO; - - return phydev->drv->read_status(phydev); + mdio_device_reset(&phydev->mdio, value); } #define phydev_err(_phydev, format, args...) \ @@ -890,6 +902,17 @@ int genphy_c45_read_pma(struct phy_device *phydev); int genphy_c45_pma_setup_forced(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); +static inline int phy_read_status(struct phy_device *phydev) +{ + if (!phydev->drv) + return -EIO; + + if (phydev->drv->read_status) + return phydev->drv->read_status(phydev); + else + return genphy_read_status(phydev); +} + void phy_driver_unregister(struct phy_driver *drv); void phy_drivers_unregister(struct phy_driver *drv, int n); int phy_driver_register(struct phy_driver *new_driver, struct module *owner); diff --git a/include/linux/phylink.h b/include/linux/phylink.h index af67edd4ae38..bd137c273d38 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -7,6 +7,7 @@ struct device_node; struct ethtool_cmd; +struct fwnode_handle; struct net_device; enum { @@ -20,19 +21,31 @@ enum { MLO_AN_PHY = 0, /* Conventional PHY */ MLO_AN_FIXED, /* Fixed-link mode */ - MLO_AN_SGMII, /* Cisco SGMII protocol */ - MLO_AN_8023Z, /* 1000base-X protocol */ + MLO_AN_INBAND, /* In-band protocol */ }; static inline bool phylink_autoneg_inband(unsigned int mode) { - return mode == MLO_AN_SGMII || mode == MLO_AN_8023Z; + return mode == MLO_AN_INBAND; } +/** + * struct phylink_link_state - link state structure + * @advertising: ethtool bitmask containing advertised link modes + * @lp_advertising: ethtool bitmask containing link partner advertised link + * modes + * @interface: link &typedef phy_interface_t mode + * @speed: link speed, one of the SPEED_* constants. + * @duplex: link duplex mode, one of DUPLEX_* constants. + * @pause: link pause state, described by MLO_PAUSE_* constants. + * @link: true if the link is up. + * @an_enabled: true if autonegotiation is enabled/desired. + * @an_complete: true if autonegotiation has completed. + */ struct phylink_link_state { __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); - phy_interface_t interface; /* PHY_INTERFACE_xxx */ + phy_interface_t interface; int speed; int duplex; int pause; @@ -41,72 +54,145 @@ struct phylink_link_state { unsigned int an_complete:1; }; +/** + * struct phylink_mac_ops - MAC operations structure. + * @validate: Validate and update the link configuration. + * @mac_link_state: Read the current link state from the hardware. + * @mac_config: configure the MAC for the selected mode and state. + * @mac_an_restart: restart 802.3z BaseX autonegotiation. + * @mac_link_down: take the link down. + * @mac_link_up: allow the link to come up. + * + * The individual methods are described more fully below. + */ struct phylink_mac_ops { - /** - * validate: validate and update the link configuration - * @ndev: net_device structure associated with MAC - * @config: configuration to validate - * - * Update the %config->supported and %config->advertised masks - * clearing bits that can not be supported. - * - * Note: the PHY may be able to transform from one connection - * technology to another, so, eg, don't clear 1000BaseX just - * because the MAC is unable to support it. This is more about - * clearing unsupported speeds and duplex settings. - * - * If the %config->interface mode is %PHY_INTERFACE_MODE_1000BASEX - * or %PHY_INTERFACE_MODE_2500BASEX, select the appropriate mode - * based on %config->advertised and/or %config->speed. - */ void (*validate)(struct net_device *ndev, unsigned long *supported, struct phylink_link_state *state); - - /* Read the current link state from the hardware */ - int (*mac_link_state)(struct net_device *, struct phylink_link_state *); - - /* Configure the MAC */ - /** - * mac_config: configure the MAC for the selected mode and state - * @ndev: net_device structure for the MAC - * @mode: one of MLO_AN_FIXED, MLO_AN_PHY, MLO_AN_8023Z, MLO_AN_SGMII - * @state: state structure - * - * The action performed depends on the currently selected mode: - * - * %MLO_AN_FIXED, %MLO_AN_PHY: - * set the specified speed, duplex, pause mode, and phy interface - * mode in the provided @state. - * %MLO_AN_8023Z: - * place the link in 1000base-X mode, advertising the parameters - * given in advertising in @state. - * %MLO_AN_SGMII: - * place the link in Cisco SGMII mode - there is no advertisment - * to make as the PHY communicates the speed and duplex to the - * MAC over the in-band control word. Configuration of the pause - * mode is as per MLO_AN_PHY since this is not included. - */ + int (*mac_link_state)(struct net_device *ndev, + struct phylink_link_state *state); void (*mac_config)(struct net_device *ndev, unsigned int mode, const struct phylink_link_state *state); - - /** - * mac_an_restart: restart 802.3z BaseX autonegotiation - * @ndev: net_device structure for the MAC - */ void (*mac_an_restart)(struct net_device *ndev); - - void (*mac_link_down)(struct net_device *, unsigned int mode); - void (*mac_link_up)(struct net_device *, unsigned int mode, - struct phy_device *); + void (*mac_link_down)(struct net_device *ndev, unsigned int mode); + void (*mac_link_up)(struct net_device *ndev, unsigned int mode, + struct phy_device *phy); }; -struct phylink *phylink_create(struct net_device *, struct device_node *, +#if 0 /* For kernel-doc purposes only. */ +/** + * validate - Validate and update the link configuration + * @ndev: a pointer to a &struct net_device for the MAC. + * @supported: ethtool bitmask for supported link modes. + * @state: a pointer to a &struct phylink_link_state. + * + * Clear bits in the @supported and @state->advertising masks that + * are not supportable by the MAC. + * + * Note that the PHY may be able to transform from one connection + * technology to another, so, eg, don't clear 1000BaseX just + * because the MAC is unable to BaseX mode. This is more about + * clearing unsupported speeds and duplex settings. + * + * If the @state->interface mode is %PHY_INTERFACE_MODE_1000BASEX + * or %PHY_INTERFACE_MODE_2500BASEX, select the appropriate mode + * based on @state->advertising and/or @state->speed and update + * @state->interface accordingly. + */ +void validate(struct net_device *ndev, unsigned long *supported, + struct phylink_link_state *state); + +/** + * mac_link_state() - Read the current link state from the hardware + * @ndev: a pointer to a &struct net_device for the MAC. + * @state: a pointer to a &struct phylink_link_state. + * + * Read the current link state from the MAC, reporting the current + * speed in @state->speed, duplex mode in @state->duplex, pause mode + * in @state->pause using the %MLO_PAUSE_RX and %MLO_PAUSE_TX bits, + * negotiation completion state in @state->an_complete, and link + * up state in @state->link. + */ +int mac_link_state(struct net_device *ndev, + struct phylink_link_state *state); + +/** + * mac_config() - configure the MAC for the selected mode and state + * @ndev: a pointer to a &struct net_device for the MAC. + * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. + * @state: a pointer to a &struct phylink_link_state. + * + * The action performed depends on the currently selected mode: + * + * %MLO_AN_FIXED, %MLO_AN_PHY: + * Configure the specified @state->speed, @state->duplex and + * @state->pause (%MLO_PAUSE_TX / %MLO_PAUSE_RX) mode. + * + * %MLO_AN_INBAND: + * place the link in an inband negotiation mode (such as 802.3z + * 1000base-X or Cisco SGMII mode depending on the @state->interface + * mode). In both cases, link state management (whether the link + * is up or not) is performed by the MAC, and reported via the + * mac_link_state() callback. Changes in link state must be made + * by calling phylink_mac_change(). + * + * If in 802.3z mode, the link speed is fixed, dependent on the + * @state->interface. Duplex is negotiated, and pause is advertised + * according to @state->an_enabled, @state->pause and + * @state->advertising flags. Beware of MACs which only support full + * duplex at gigabit and higher speeds. + * + * If in Cisco SGMII mode, the link speed and duplex mode are passed + * in the serial bitstream 16-bit configuration word, and the MAC + * should be configured to read these bits and acknowledge the + * configuration word. Nothing is advertised by the MAC. The MAC is + * responsible for reading the configuration word and configuring + * itself accordingly. + */ +void mac_config(struct net_device *ndev, unsigned int mode, + const struct phylink_link_state *state); + +/** + * mac_an_restart() - restart 802.3z BaseX autonegotiation + * @ndev: a pointer to a &struct net_device for the MAC. + */ +void mac_an_restart(struct net_device *ndev); + +/** + * mac_link_down() - take the link down + * @ndev: a pointer to a &struct net_device for the MAC. + * @mode: link autonegotiation mode + * + * If @mode is not an in-band negotiation mode (as defined by + * phylink_autoneg_inband()), force the link down and disable any + * Energy Efficient Ethernet MAC configuration. + */ +void mac_link_down(struct net_device *ndev, unsigned int mode); + +/** + * mac_link_up() - allow the link to come up + * @ndev: a pointer to a &struct net_device for the MAC. + * @mode: link autonegotiation mode + * @phy: any attached phy + * + * If @mode is not an in-band negotiation mode (as defined by + * phylink_autoneg_inband()), allow the link to come up. If @phy + * is non-%NULL, configure Energy Efficient Ethernet by calling + * phy_init_eee() and perform appropriate MAC configuration for EEE. + */ +void mac_link_up(struct net_device *ndev, unsigned int mode, + struct phy_device *phy); +#endif + +struct phylink *phylink_create(struct net_device *, struct fwnode_handle *, phy_interface_t iface, const struct phylink_mac_ops *ops); void phylink_destroy(struct phylink *); int phylink_connect_phy(struct phylink *, struct phy_device *); -int phylink_of_phy_connect(struct phylink *, struct device_node *); +int phylink_of_phy_connect(struct phylink *, struct device_node *, u32 flags); void phylink_disconnect_phy(struct phylink *); +int phylink_fixed_state_cb(struct phylink *, + void (*cb)(struct net_device *dev, + struct phylink_link_state *)); void phylink_mac_change(struct phylink *, bool up); @@ -128,7 +214,6 @@ int phylink_ethtool_set_pauseparam(struct phylink *, int phylink_ethtool_get_module_info(struct phylink *, struct ethtool_modinfo *); int phylink_ethtool_get_module_eeprom(struct phylink *, struct ethtool_eeprom *, u8 *); -int phylink_init_eee(struct phylink *, bool); int phylink_get_eee_err(struct phylink *); int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *); diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 361c08e35dbc..c9df2527e0cd 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -207,6 +207,7 @@ struct rhashtable_iter { struct rhashtable_walker walker; unsigned int slot; unsigned int skip; + bool end_of_table; }; static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) @@ -239,34 +240,42 @@ static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1); } -static inline unsigned int rht_key_hashfn( - struct rhashtable *ht, const struct bucket_table *tbl, - const void *key, const struct rhashtable_params params) +static inline unsigned int rht_key_get_hash(struct rhashtable *ht, + const void *key, const struct rhashtable_params params, + unsigned int hash_rnd) { unsigned int hash; /* params must be equal to ht->p if it isn't constant. */ if (!__builtin_constant_p(params.key_len)) - hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd); + hash = ht->p.hashfn(key, ht->key_len, hash_rnd); else if (params.key_len) { unsigned int key_len = params.key_len; if (params.hashfn) - hash = params.hashfn(key, key_len, tbl->hash_rnd); + hash = params.hashfn(key, key_len, hash_rnd); else if (key_len & (sizeof(u32) - 1)) - hash = jhash(key, key_len, tbl->hash_rnd); + hash = jhash(key, key_len, hash_rnd); else - hash = jhash2(key, key_len / sizeof(u32), - tbl->hash_rnd); + hash = jhash2(key, key_len / sizeof(u32), hash_rnd); } else { unsigned int key_len = ht->p.key_len; if (params.hashfn) - hash = params.hashfn(key, key_len, tbl->hash_rnd); + hash = params.hashfn(key, key_len, hash_rnd); else - hash = jhash(key, key_len, tbl->hash_rnd); + hash = jhash(key, key_len, hash_rnd); } + return hash; +} + +static inline unsigned int rht_key_hashfn( + struct rhashtable *ht, const struct bucket_table *tbl, + const void *key, const struct rhashtable_params params) +{ + unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd); + return rht_bucket_index(tbl, hash); } @@ -378,8 +387,15 @@ void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); -int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); +int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU); + +static inline void rhashtable_walk_start(struct rhashtable_iter *iter) +{ + (void)rhashtable_walk_start_check(iter); +} + void *rhashtable_walk_next(struct rhashtable_iter *iter); +void *rhashtable_walk_peek(struct rhashtable_iter *iter); void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); void rhashtable_free_and_destroy(struct rhashtable *ht, diff --git a/include/linux/sctp.h b/include/linux/sctp.h index da803dfc7a39..b36c76635f18 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -102,11 +102,15 @@ enum sctp_cid { /* AUTH Extension Section 4.1 */ SCTP_CID_AUTH = 0x0F, + /* sctp ndata 5.1. I-DATA */ + SCTP_CID_I_DATA = 0x40, + /* PR-SCTP Sec 3.2 */ SCTP_CID_FWD_TSN = 0xC0, /* Use hex, as defined in ADDIP sec. 3.1 */ SCTP_CID_ASCONF = 0xC1, + SCTP_CID_I_FWD_TSN = 0xC2, SCTP_CID_ASCONF_ACK = 0x80, SCTP_CID_RECONF = 0x82, }; /* enum */ @@ -240,6 +244,23 @@ struct sctp_data_chunk { struct sctp_datahdr data_hdr; }; +struct sctp_idatahdr { + __be32 tsn; + __be16 stream; + __be16 reserved; + __be32 mid; + union { + __u32 ppid; + __be32 fsn; + }; + __u8 payload[0]; +}; + +struct sctp_idata_chunk { + struct sctp_chunkhdr chunk_hdr; + struct sctp_idatahdr data_hdr; +}; + /* DATA Chuck Specific Flags */ enum { SCTP_DATA_MIDDLE_FRAG = 0x00, @@ -596,6 +617,22 @@ struct sctp_fwdtsn_chunk { struct sctp_fwdtsn_hdr fwdtsn_hdr; }; +struct sctp_ifwdtsn_skip { + __be16 stream; + __u8 reserved; + __u8 flags; + __be32 mid; +}; + +struct sctp_ifwdtsn_hdr { + __be32 new_cum_tsn; + struct sctp_ifwdtsn_skip skip[0]; +}; + +struct sctp_ifwdtsn_chunk { + struct sctp_chunkhdr chunk_hdr; + struct sctp_ifwdtsn_hdr fwdtsn_hdr; +}; /* ADDIP * Section 3.1.1 Address Configuration Change Chunk (ASCONF) diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 4a906f560817..0c5c5f6ae1ec 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -3,7 +3,7 @@ #include <linux/phy.h> -struct __packed sfp_eeprom_base { +struct sfp_eeprom_base { u8 phys_id; u8 phys_ext_id; u8 connector; @@ -166,12 +166,12 @@ struct __packed sfp_eeprom_base { union { __be16 optical_wavelength; u8 cable_spec; - }; + } __packed; u8 reserved62; u8 cc_base; -}; +} __packed; -struct __packed sfp_eeprom_ext { +struct sfp_eeprom_ext { __be16 options; u8 br_max; u8 br_min; @@ -181,12 +181,21 @@ struct __packed sfp_eeprom_ext { u8 enhopts; u8 sff8472_compliance; u8 cc_ext; -}; - -struct __packed sfp_eeprom_id { +} __packed; + +/** + * struct sfp_eeprom_id - raw SFP module identification information + * @base: base SFP module identification structure + * @ext: extended SFP module identification structure + * + * See the SFF-8472 specification and related documents for the definition + * of these structure members. This can be obtained from + * ftp://ftp.seagate.com/sff + */ +struct sfp_eeprom_id { struct sfp_eeprom_base base; struct sfp_eeprom_ext ext; -}; +} __packed; /* SFP EEPROM registers */ enum { @@ -222,6 +231,7 @@ enum { SFP_SFF8472_COMPLIANCE = 0x5e, SFP_CC_EXT = 0x5f, + SFP_PHYS_ID_SFF = 0x02, SFP_PHYS_ID_SFP = 0x03, SFP_PHYS_EXT_ID_SFP = 0x04, SFP_CONNECTOR_UNSPEC = 0x00, @@ -347,19 +357,32 @@ enum { SFP_PAGE = 0x7f, }; -struct device_node; +struct fwnode_handle; struct ethtool_eeprom; struct ethtool_modinfo; struct net_device; struct sfp_bus; +/** + * struct sfp_upstream_ops - upstream operations structure + * @module_insert: called after a module has been detected to determine + * whether the module is supported for the upstream device. + * @module_remove: called after the module has been removed. + * @link_down: called when the link is non-operational for whatever + * reason. + * @link_up: called when the link is operational. + * @connect_phy: called when an I2C accessible PHY has been detected + * on the module. + * @disconnect_phy: called when a module with an I2C accessible PHY has + * been removed. + */ struct sfp_upstream_ops { - int (*module_insert)(void *, const struct sfp_eeprom_id *id); - void (*module_remove)(void *); - void (*link_down)(void *); - void (*link_up)(void *); - int (*connect_phy)(void *, struct phy_device *); - void (*disconnect_phy)(void *); + int (*module_insert)(void *priv, const struct sfp_eeprom_id *id); + void (*module_remove)(void *priv); + void (*link_down)(void *priv); + void (*link_up)(void *priv); + int (*connect_phy)(void *priv, struct phy_device *); + void (*disconnect_phy)(void *priv); }; #if IS_ENABLED(CONFIG_SFP) @@ -375,7 +398,7 @@ int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, u8 *data); void sfp_upstream_start(struct sfp_bus *bus); void sfp_upstream_stop(struct sfp_bus *bus); -struct sfp_bus *sfp_register_upstream(struct device_node *np, +struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode, struct net_device *ndev, void *upstream, const struct sfp_upstream_ops *ops); void sfp_unregister_upstream(struct sfp_bus *bus); @@ -419,7 +442,8 @@ static inline void sfp_upstream_stop(struct sfp_bus *bus) { } -static inline struct sfp_bus *sfp_register_upstream(struct device_node *np, +static inline struct sfp_bus *sfp_register_upstream( + struct fwnode_handle *fwnode, struct net_device *ndev, void *upstream, const struct sfp_upstream_ops *ops) { diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index 8621ffdeecbf..c7addf37d119 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -72,6 +72,11 @@ static inline bool __skb_array_empty(struct skb_array *a) return !__ptr_ring_peek(&a->ring); } +static inline struct sk_buff *__skb_array_peek(struct skb_array *a) +{ + return __ptr_ring_peek(&a->ring); +} + static inline bool skb_array_empty(struct skb_array *a) { return ptr_ring_empty(&a->ring); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a38c80e9f91e..b8e0da6c27d6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1211,6 +1211,11 @@ static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow, data, proto, nhoff, hlen, flags); } +void +skb_flow_dissect_tunnel_info(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container); + static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 3bf273538840..4894d322d258 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -409,4 +409,10 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); #define atomic_dec_and_lock(atomic, lock) \ __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) +int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, + size_t max_size, unsigned int cpu_mult, + gfp_t gfp); + +void free_bucket_spinlocks(spinlock_t *locks); + #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ca4a6361389b..4f93f0953c41 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -344,7 +344,7 @@ struct tcp_sock { /* Receiver queue space */ struct { - int space; + u32 space; u32 seq; u64 time; } rcvq_space; diff --git a/include/net/act_api.h b/include/net/act_api.h index fd08df74c466..6ed9692f20bd 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -86,7 +86,7 @@ struct tc_action_ops { int (*act)(struct sk_buff *, const struct tc_action *, struct tcf_result *); int (*dump)(struct sk_buff *, struct tc_action *, int, int); - void (*cleanup)(struct tc_action *, int bind); + void (*cleanup)(struct tc_action *); int (*lookup)(struct net *, struct tc_action **, u32); int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, @@ -120,12 +120,19 @@ int tc_action_net_init(struct tc_action_net *tn, void tcf_idrinfo_destroy(const struct tc_action_ops *ops, struct tcf_idrinfo *idrinfo); -static inline void tc_action_net_exit(struct tc_action_net *tn) +static inline void tc_action_net_exit(struct list_head *net_list, + unsigned int id) { + struct net *net; + rtnl_lock(); - tcf_idrinfo_destroy(tn->ops, tn->idrinfo); + list_for_each_entry(net, net_list, exit_list) { + struct tc_action_net *tn = net_generic(net, id); + + tcf_idrinfo_destroy(tn->ops, tn->idrinfo); + kfree(tn->idrinfo); + } rtnl_unlock(); - kfree(tn->idrinfo); } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, diff --git a/include/net/addrconf.h b/include/net/addrconf.h index b623b65a79d1..c4185a7b0e90 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -180,7 +180,7 @@ static inline int addrconf_finite_timeout(unsigned long timeout) */ int ipv6_addr_label_init(void); void ipv6_addr_label_cleanup(void); -void ipv6_addr_label_rtnl_register(void); +int ipv6_addr_label_rtnl_register(void); u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, int type, int ifindex); diff --git a/include/net/dn_route.h b/include/net/dn_route.h index 55df9939bca2..342d2503cba5 100644 --- a/include/net/dn_route.h +++ b/include/net/dn_route.h @@ -69,6 +69,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, */ struct dn_route { struct dst_entry dst; + struct dn_route __rcu *dn_next; struct neighbour *n; diff --git a/include/net/dsa.h b/include/net/dsa.h index 2a05738570d8..6cb602dd970c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -296,31 +296,39 @@ static inline u32 dsa_user_ports(struct dsa_switch *ds) return mask; } -static inline u8 dsa_upstream_port(struct dsa_switch *ds) +/* Return the local port used to reach an arbitrary switch port */ +static inline unsigned int dsa_towards_port(struct dsa_switch *ds, int device, + int port) { - struct dsa_switch_tree *dst = ds->dst; - - /* - * If this is the root switch (i.e. the switch that connects - * to the CPU), return the cpu port number on this switch. - * Else return the (DSA) port number that connects to the - * switch that is one hop closer to the cpu. - */ - if (dst->cpu_dp->ds == ds) - return dst->cpu_dp->index; + if (device == ds->index) + return port; else - return ds->rtable[dst->cpu_dp->ds->index]; + return ds->rtable[device]; +} + +/* Return the local port used to reach the dedicated CPU port */ +static inline unsigned int dsa_upstream_port(struct dsa_switch *ds, int port) +{ + const struct dsa_port *dp = dsa_to_port(ds, port); + const struct dsa_port *cpu_dp = dp->cpu_dp; + + if (!cpu_dp) + return port; + + return dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index); } typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { +#if IS_ENABLED(CONFIG_NET_DSA_LEGACY) /* * Legacy probing. */ const char *(*probe)(struct device *dsa_dev, struct device *host_dev, int sw_addr, void **priv); +#endif enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, int port); @@ -412,12 +420,10 @@ struct dsa_switch_ops { */ int (*port_vlan_filtering)(struct dsa_switch *ds, int port, bool vlan_filtering); - int (*port_vlan_prepare)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); - void (*port_vlan_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); + int (*port_vlan_prepare)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan); + void (*port_vlan_add)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); /* @@ -433,12 +439,10 @@ struct dsa_switch_ops { /* * Multicast database */ - int (*port_mdb_prepare)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans); - void (*port_mdb_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans); + int (*port_mdb_prepare)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb); + void (*port_mdb_add)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb); int (*port_mdb_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb); /* @@ -472,11 +476,20 @@ struct dsa_switch_driver { const struct dsa_switch_ops *ops; }; +#if IS_ENABLED(CONFIG_NET_DSA_LEGACY) /* Legacy driver registration */ void register_switch_driver(struct dsa_switch_driver *type); void unregister_switch_driver(struct dsa_switch_driver *type); struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); +#else +static inline void register_switch_driver(struct dsa_switch_driver *type) { } +static inline void unregister_switch_driver(struct dsa_switch_driver *type) { } +static inline struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) +{ + return NULL; +} +#endif struct net_device *dsa_dev_to_net_device(struct device *dev); /* Keep inline for faster access in hot path */ diff --git a/include/net/dst.h b/include/net/dst.h index b091fd536098..33d2a5433924 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -34,13 +34,9 @@ struct sk_buff; struct dst_entry { struct net_device *dev; - struct rcu_head rcu_head; - struct dst_entry *child; struct dst_ops *ops; unsigned long _metrics; unsigned long expires; - struct dst_entry *path; - struct dst_entry *from; #ifdef CONFIG_XFRM struct xfrm_state *xfrm; #else @@ -59,8 +55,6 @@ struct dst_entry { #define DST_XFRM_QUEUE 0x0040 #define DST_METADATA 0x0080 - short error; - /* A non-zero value of dst->obsolete forces by-hand validation * of the route entry. Positive values are set by the generic * dst layer to indicate that the entry has been forcefully @@ -76,35 +70,24 @@ struct dst_entry { #define DST_OBSOLETE_KILL -2 unsigned short header_len; /* more space at head required */ unsigned short trailer_len; /* space to reserve at tail */ - unsigned short __pad3; -#ifdef CONFIG_IP_ROUTE_CLASSID - __u32 tclassid; -#else - __u32 __pad2; -#endif - -#ifdef CONFIG_64BIT - /* - * Align __refcnt to a 64 bytes alignment - * (L1_CACHE_SIZE would be too much) - */ - long __pad_to_align_refcnt[2]; -#endif /* * __refcnt wants to be on a different cache line from * input/output/ops or performance tanks badly */ - atomic_t __refcnt; /* client references */ +#ifdef CONFIG_64BIT + atomic_t __refcnt; /* 64-bit offset 64 */ +#endif int __use; unsigned long lastuse; struct lwtunnel_state *lwtstate; - union { - struct dst_entry *next; - struct rtable __rcu *rt_next; - struct rt6_info __rcu *rt6_next; - struct dn_route __rcu *dn_next; - }; + struct rcu_head rcu_head; + short error; + short __pad; + __u32 tclassid; +#ifndef CONFIG_64BIT + atomic_t __refcnt; /* 32-bit offset 64 */ +#endif }; struct dst_metrics { @@ -250,7 +233,7 @@ static inline void dst_hold(struct dst_entry *dst) { /* * If your kernel compilation stops here, please check - * __pad_to_align_refcnt declaration in struct dst_entry + * the placement of __refcnt in struct dst_entry */ BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0); diff --git a/include/net/erspan.h b/include/net/erspan.h index ca94fc86865e..acdf6843095d 100644 --- a/include/net/erspan.h +++ b/include/net/erspan.h @@ -15,7 +15,7 @@ * s, Recur, Flags, Version fields only S (bit 03) is set to 1. The * other fields are set to zero, so only a sequence number follows. * - * ERSPAN Type II header (8 octets [42:49]) + * ERSPAN Version 1 (Type II) header (8 octets [42:49]) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -24,11 +24,29 @@ * | Reserved | Index | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * + * + * ERSPAN Version 2 (Type III) header (12 octets [42:49]) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Ver | VLAN | COS |BSO|T| Session ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Timestamp | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SGT |P| FT | Hw ID |D|Gra|O| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Platform Specific SubHeader (8 octets, optional) + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Platf ID | Platform Specific Info | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Platform Specific Info | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * * GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB */ -#define ERSPAN_VERSION 0x1 - +#define ERSPAN_VERSION 0x1 /* ERSPAN type II */ #define VER_MASK 0xf000 #define VLAN_MASK 0x0fff #define COS_MASK 0xe000 @@ -37,6 +55,28 @@ #define ID_MASK 0x03ff #define INDEX_MASK 0xfffff +#define ERSPAN_VERSION2 0x2 /* ERSPAN type III*/ +#define BSO_MASK EN_MASK +#define SGT_MASK 0xffff0000 +#define P_MASK 0x8000 +#define FT_MASK 0x7c00 +#define HWID_MASK 0x03f0 +#define DIR_MASK 0x0008 +#define GRA_MASK 0x0006 +#define O_MASK 0x0001 + +/* ERSPAN version 2 metadata header */ +struct erspan_md2 { + __be32 timestamp; + __be16 sgt; /* security group tag */ + __be16 flags; +#define P_OFFSET 15 +#define FT_OFFSET 10 +#define HWID_OFFSET 4 +#define DIR_OFFSET 3 +#define GRA_OFFSET 1 +}; + enum erspan_encap_type { ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */ ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */ @@ -44,18 +84,159 @@ enum erspan_encap_type { ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */ }; +#define ERSPAN_V1_MDSIZE 4 +#define ERSPAN_V2_MDSIZE 8 struct erspan_metadata { - __be32 index; /* type II */ + union { + __be32 index; /* Version 1 (type II)*/ + struct erspan_md2 md2; /* Version 2 (type III) */ + } u; + int version; }; -struct erspanhdr { +struct erspan_base_hdr { __be16 ver_vlan; #define VER_OFFSET 12 __be16 session_id; #define COS_OFFSET 13 #define EN_OFFSET 11 +#define BSO_OFFSET EN_OFFSET #define T_OFFSET 10 - struct erspan_metadata md; }; +static inline int erspan_hdr_len(int version) +{ + return sizeof(struct erspan_base_hdr) + + (version == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE); +} + +static inline u8 tos_to_cos(u8 tos) +{ + u8 dscp, cos; + + dscp = tos >> 2; + cos = dscp >> 3; + return cos; +} + +static inline void erspan_build_header(struct sk_buff *skb, + __be32 id, u32 index, + bool truncate, bool is_ipv4) +{ + struct ethhdr *eth = eth_hdr(skb); + enum erspan_encap_type enc_type; + struct erspan_base_hdr *ershdr; + struct erspan_metadata *ersmd; + struct qtag_prefix { + __be16 eth_type; + __be16 tci; + } *qp; + u16 vlan_tci = 0; + u8 tos; + + tos = is_ipv4 ? ip_hdr(skb)->tos : + (ipv6_hdr(skb)->priority << 4) + + (ipv6_hdr(skb)->flow_lbl[0] >> 4); + + enc_type = ERSPAN_ENCAP_NOVLAN; + + /* If mirrored packet has vlan tag, extract tci and + * perserve vlan header in the mirrored frame. + */ + if (eth->h_proto == htons(ETH_P_8021Q)) { + qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); + vlan_tci = ntohs(qp->tci); + enc_type = ERSPAN_ENCAP_INFRAME; + } + + skb_push(skb, sizeof(*ershdr) + ERSPAN_V1_MDSIZE); + ershdr = (struct erspan_base_hdr *)skb->data; + memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V1_MDSIZE); + + /* Build base header */ + ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) | + (ERSPAN_VERSION << VER_OFFSET)); + ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) | + ((tos_to_cos(tos) << COS_OFFSET) & COS_MASK) | + (enc_type << EN_OFFSET & EN_MASK) | + ((truncate << T_OFFSET) & T_MASK)); + + /* Build metadata */ + ersmd = (struct erspan_metadata *)(ershdr + 1); + ersmd->u.index = htonl(index & INDEX_MASK); +} + +/* ERSPAN GRA: timestamp granularity + * 00b --> granularity = 100 microseconds + * 01b --> granularity = 100 nanoseconds + * 10b --> granularity = IEEE 1588 + * Here we only support 100 microseconds. + */ +static inline __be32 erspan_get_timestamp(void) +{ + u64 h_usecs; + ktime_t kt; + + kt = ktime_get_real(); + h_usecs = ktime_divns(kt, 100 * NSEC_PER_USEC); + + /* ERSPAN base header only has 32-bit, + * so it wraps around 4 days. + */ + return htonl((u32)h_usecs); +} + +static inline void erspan_build_header_v2(struct sk_buff *skb, + __be32 id, u8 direction, u16 hwid, + bool truncate, bool is_ipv4) +{ + struct ethhdr *eth = eth_hdr(skb); + struct erspan_base_hdr *ershdr; + struct erspan_metadata *md; + struct qtag_prefix { + __be16 eth_type; + __be16 tci; + } *qp; + u16 vlan_tci = 0; + u16 session_id; + u8 gra = 0; /* 100 usec */ + u8 bso = 0; /* Bad/Short/Oversized */ + u8 sgt = 0; + u8 tos; + + tos = is_ipv4 ? ip_hdr(skb)->tos : + (ipv6_hdr(skb)->priority << 4) + + (ipv6_hdr(skb)->flow_lbl[0] >> 4); + + /* Unlike v1, v2 does not have En field, + * so only extract vlan tci field. + */ + if (eth->h_proto == htons(ETH_P_8021Q)) { + qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); + vlan_tci = ntohs(qp->tci); + } + + skb_push(skb, sizeof(*ershdr) + ERSPAN_V2_MDSIZE); + ershdr = (struct erspan_base_hdr *)skb->data; + memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE); + + /* Build base header */ + ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) | + (ERSPAN_VERSION2 << VER_OFFSET)); + session_id = (u16)(ntohl(id) & ID_MASK) | + ((tos_to_cos(tos) << COS_OFFSET) & COS_MASK) | + (bso << BSO_OFFSET & BSO_MASK) | + ((truncate << T_OFFSET) & T_MASK); + ershdr->session_id = htons(session_id); + + /* Build metadata */ + md = (struct erspan_metadata *)(ershdr + 1); + md->u.md2.timestamp = erspan_get_timestamp(); + md->u.md2.sgt = htons(sgt); + md->u.md2.flags = htons(((1 << P_OFFSET) & P_MASK) | + ((hwid << HWID_OFFSET) & HWID_MASK) | + ((direction << DIR_OFFSET) & DIR_MASK) | + ((gra << GRA_OFFSET) & GRA_MASK)); +} + #endif diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 304f7aa9cc01..0304ba2ae353 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -49,6 +49,9 @@ int gnet_stats_copy_rate_est(struct gnet_dump *d, int gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue __percpu *cpu_q, struct gnet_stats_queue *q, __u32 qlen); +void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, + const struct gnet_stats_queue __percpu *cpu_q, + const struct gnet_stats_queue *q, __u32 qlen); int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len); int gnet_stats_finish_copy(struct gnet_dump *d); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 0358745ea059..8e1bf9ae4a5e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops { * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_data ULP private data + * @icsk_listen_portaddr_node hash to the portaddr listener hashtable * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@ -101,6 +102,7 @@ struct inet_connection_sock { const struct inet_connection_sock_af_ops *icsk_af_ops; const struct tcp_ulp_ops *icsk_ulp_ops; void *icsk_ulp_data; + struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:6, icsk_ca_setsockopt:1, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 2dbbbff5e1e3..9141e95529e7 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -111,6 +111,7 @@ struct inet_bind_hashbucket { */ struct inet_listen_hashbucket { spinlock_t lock; + unsigned int count; struct hlist_head head; }; @@ -132,12 +133,13 @@ struct inet_hashinfo { /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. */ + struct kmem_cache *bind_bucket_cachep; struct inet_bind_hashbucket *bhash; - unsigned int bhash_size; - /* 4 bytes hole on 64 bit */ - struct kmem_cache *bind_bucket_cachep; + /* The 2nd listener table hashed by local port and address */ + unsigned int lhash2_mask; + struct inet_listen_hashbucket *lhash2; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. @@ -145,14 +147,25 @@ struct inet_hashinfo { * Now align to a new cache line as all the following members * might be often dirty. */ - /* All sockets in TCP_LISTEN state will be in here. This is the only - * table where wildcard'd TCP sockets can exist. Hash function here - * is just local port number. + /* All sockets in TCP_LISTEN state will be in listening_hash. + * This is the only table where wildcard'd TCP sockets can + * exist. listening_hash is only hashed by local port number. + * If lhash2 is initialized, the same socket will also be hashed + * to lhash2 by port and address. */ struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] ____cacheline_aligned_in_smp; }; +#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \ + hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node) + +static inline struct inet_listen_hashbucket * +inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash) +{ + return &h->lhash2[hash & h->lhash2_mask]; +} + static inline struct inet_ehash_bucket *inet_ehash_bucket( struct inet_hashinfo *hashinfo, unsigned int hash) @@ -208,6 +221,10 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child); void inet_put_port(struct sock *sk); void inet_hashinfo_init(struct inet_hashinfo *h); +void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, + unsigned long numentries, int scale, + unsigned long low_limit, + unsigned long high_limit); bool inet_ehash_insert(struct sock *sk, struct sock *osk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 1356fa6a7566..899495589a7e 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -93,8 +93,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, struct inet_timewait_death_row *dr, const int state); -void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, - struct inet_hashinfo *hashinfo); +void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, + struct inet_hashinfo *hashinfo); void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm); diff --git a/include/net/ip.h b/include/net/ip.h index af8addbaa3c1..746abff9ce51 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -26,12 +26,14 @@ #include <linux/ip.h> #include <linux/in.h> #include <linux/skbuff.h> +#include <linux/jhash.h> #include <net/inet_sock.h> #include <net/route.h> #include <net/snmp.h> #include <net/flow.h> #include <net/flow_dissector.h> +#include <net/netns/hash.h> #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ #define IPV4_MIN_MTU 68 /* RFC 791 */ @@ -522,6 +524,13 @@ static inline unsigned int ipv4_addr_hash(__be32 ip) return (__force unsigned int) ip; } +static inline u32 ipv4_portaddr_hash(const struct net *net, + __be32 saddr, + unsigned int port) +{ + return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; +} + bool ip_call_ra_chain(struct sk_buff *skb); /* diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 10c913816032..44d96a91e745 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -129,6 +129,8 @@ struct rt6_exception { struct rt6_info { struct dst_entry dst; + struct rt6_info __rcu *rt6_next; + struct rt6_info *from; /* * Tail elements of dst_entry (__refcnt etc.) @@ -176,11 +178,11 @@ struct rt6_info { #define for_each_fib6_node_rt_rcu(fn) \ for (rt = rcu_dereference((fn)->leaf); rt; \ - rt = rcu_dereference(rt->dst.rt6_next)) + rt = rcu_dereference(rt->rt6_next)) #define for_each_fib6_walker_rt(w) \ for (rt = (w)->leaf; rt; \ - rt = rcu_dereference_protected(rt->dst.rt6_next, 1)) + rt = rcu_dereference_protected(rt->rt6_next, 1)) static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) { @@ -203,11 +205,9 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) { struct rt6_info *rt; - for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); - rt = (struct rt6_info *)rt->dst.from); + for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from); if (rt && rt != rt0) rt0->dst.expires = rt->dst.expires; - dst_set_expires(&rt0->dst, timeout); rt0->rt6i_flags |= RTF_EXPIRES; } @@ -242,8 +242,8 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt) u32 cookie = 0; if (rt->rt6i_flags & RTF_PCPU || - (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from)) - rt = (struct rt6_info *)(rt->dst.from); + (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) + rt = rt->from; rt6_get_cookie_safe(rt, &cookie); diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index d66f70f63734..236e40ba06bf 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -36,6 +36,10 @@ struct __ip6_tnl_parm { __be32 o_key; __u32 fwmark; + __u32 index; /* ERSPAN type II index */ + __u8 erspan_ver; /* ERSPAN version */ + __u8 dir; /* direction */ + __u16 hwid; /* hwid */ }; /* IPv6 tunnel */ diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 24628f6b09bf..1f16773cfd76 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -116,8 +116,11 @@ struct ip_tunnel { u32 o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ - /* This field used only by ERSPAN */ + /* These four fields used only by ERSPAN */ u32 index; /* ERSPAN type II index */ + u8 erspan_ver; /* ERSPAN version */ + u8 dir; /* ERSPAN direction */ + u16 hwid; /* ERSPAN hardware ID */ struct dst_cache dst_cache; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f73797e2fa60..25be4715578c 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -22,6 +22,7 @@ #include <net/flow.h> #include <net/flow_dissector.h> #include <net/snmp.h> +#include <net/netns/hash.h> #define SIN6_LEN_RFC2133 24 @@ -673,6 +674,22 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) cpu_to_be32(0x0000ffff))) == 0UL; } +static inline u32 ipv6_portaddr_hash(const struct net *net, + const struct in6_addr *addr6, + unsigned int port) +{ + unsigned int hash, mix = net_hash_mix(net); + + if (ipv6_addr_any(addr6)) + hash = jhash_1word(0, mix); + else if (ipv6_addr_v4mapped(addr6)) + hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix); + else + hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix); + + return hash ^ port; +} + /* * Check for a RFC 4843 ORCHID address * (Overlay Routable Cryptographic Hash Identifiers) diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index ebc813277662..0db7fb3e4e15 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -122,9 +122,12 @@ struct netns_sctp { /* Flag to indicate if PR-CONFIG is enabled. */ int reconf_enable; - /* Flag to idicate if SCTP-AUTH is enabled */ + /* Flag to indicate if SCTP-AUTH is enabled */ int auth_enable; + /* Flag to indicate if stream interleave is enabled */ + int intl_enable; + /* * Policy to control SCTP IPv4 address scoping * 0 - Disable IPv4 address scoping diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index d1f413f06c72..240469228851 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -105,16 +105,18 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, void qdisc_put_rtab(struct qdisc_rate_table *tab); void qdisc_put_stab(struct qdisc_size_table *tab); void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc); -int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, - struct net_device *dev, struct netdev_queue *txq, - spinlock_t *root_lock, bool validate); +bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, + struct net_device *dev, struct netdev_queue *txq, + spinlock_t *root_lock, bool validate); void __qdisc_run(struct Qdisc *q); static inline void qdisc_run(struct Qdisc *q) { - if (qdisc_run_begin(q)) + if (qdisc_run_begin(q)) { __qdisc_run(q); + qdisc_run_end(q); + } } static inline __be16 tc_skb_protocol(const struct sk_buff *skb) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index ead018744ff5..14b6b3af8918 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -13,10 +13,10 @@ enum rtnl_link_flags { RTNL_FLAG_DOIT_UNLOCKED = 1, }; -int __rtnl_register(int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); void rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); +int rtnl_register_module(struct module *owner, int protocol, int msgtype, + rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_unregister(int protocol, int msgtype); void rtnl_unregister_all(int protocol); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 83a3e47d5845..bc6b25faba99 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -71,6 +71,7 @@ struct Qdisc { * qdisc_tree_decrease_qlen() should stop. */ #define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ +#define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */ #define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */ u32 limit; const struct Qdisc_ops *ops; @@ -88,14 +89,14 @@ struct Qdisc { /* * For performance sake on SMP, we put highly modified fields at the end */ - struct sk_buff *gso_skb ____cacheline_aligned_in_smp; + struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; seqcount_t running; struct gnet_stats_queue qstats; unsigned long state; struct Qdisc *next_sched; - struct sk_buff *skb_bad_txq; + struct sk_buff_head skb_bad_txq; int padded; refcount_t refcnt; @@ -162,7 +163,8 @@ struct Qdisc_class_ops { void (*walk)(struct Qdisc *, struct qdisc_walker * arg); /* Filter manipulation */ - struct tcf_block * (*tcf_block)(struct Qdisc *, unsigned long); + struct tcf_block * (*tcf_block)(struct Qdisc *sch, + unsigned long arg); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); void (*unbind_tcf)(struct Qdisc *, unsigned long); @@ -179,6 +181,7 @@ struct Qdisc_ops { const struct Qdisc_class_ops *cl_ops; char id[IFNAMSIZ]; int priv_size; + unsigned int static_flags; int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, @@ -186,11 +189,12 @@ struct Qdisc_ops { struct sk_buff * (*dequeue)(struct Qdisc *); struct sk_buff * (*peek)(struct Qdisc *); - int (*init)(struct Qdisc *, struct nlattr *arg); + int (*init)(struct Qdisc *sch, struct nlattr *arg); void (*reset)(struct Qdisc *); void (*destroy)(struct Qdisc *); - int (*change)(struct Qdisc *, struct nlattr *arg); - void (*attach)(struct Qdisc *); + int (*change)(struct Qdisc *sch, + struct nlattr *arg); + void (*attach)(struct Qdisc *sch); int (*dump)(struct Qdisc *, struct sk_buff *); int (*dump_stats)(struct Qdisc *, struct gnet_dump *); @@ -279,7 +283,6 @@ struct tcf_block { struct net *net; struct Qdisc *q; struct list_head cb_list; - struct work_struct work; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) @@ -290,11 +293,31 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) BUILD_BUG_ON(sizeof(qcb->data) < sz); } +static inline int qdisc_qlen_cpu(const struct Qdisc *q) +{ + return this_cpu_ptr(q->cpu_qstats)->qlen; +} + static inline int qdisc_qlen(const struct Qdisc *q) { return q->q.qlen; } +static inline int qdisc_qlen_sum(const struct Qdisc *q) +{ + __u32 qlen = 0; + int i; + + if (q->flags & TCQ_F_NOLOCK) { + for_each_possible_cpu(i) + qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen; + } else { + qlen = q->q.qlen; + } + + return qlen; +} + static inline struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb) { return (struct qdisc_skb_cb *)skb->cb; @@ -631,12 +654,39 @@ static inline void qdisc_qstats_backlog_dec(struct Qdisc *sch, sch->qstats.backlog -= qdisc_pkt_len(skb); } +static inline void qdisc_qstats_cpu_backlog_dec(struct Qdisc *sch, + const struct sk_buff *skb) +{ + this_cpu_sub(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); +} + static inline void qdisc_qstats_backlog_inc(struct Qdisc *sch, const struct sk_buff *skb) { sch->qstats.backlog += qdisc_pkt_len(skb); } +static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch, + const struct sk_buff *skb) +{ + this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); +} + +static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch) +{ + this_cpu_inc(sch->cpu_qstats->qlen); +} + +static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch) +{ + this_cpu_dec(sch->cpu_qstats->qlen); +} + +static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch) +{ + this_cpu_inc(sch->cpu_qstats->requeues); +} + static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count) { sch->qstats.drops += count; @@ -767,26 +817,30 @@ static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) /* generic pseudo peek method for non-work-conserving qdisc */ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) { + struct sk_buff *skb = skb_peek(&sch->gso_skb); + /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ - if (!sch->gso_skb) { - sch->gso_skb = sch->dequeue(sch); - if (sch->gso_skb) { + if (!skb) { + skb = sch->dequeue(sch); + + if (skb) { + __skb_queue_head(&sch->gso_skb, skb); /* it's still part of the queue */ - qdisc_qstats_backlog_inc(sch, sch->gso_skb); + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; } } - return sch->gso_skb; + return skb; } /* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) { - struct sk_buff *skb = sch->gso_skb; + struct sk_buff *skb = skb_peek(&sch->gso_skb); if (skb) { - sch->gso_skb = NULL; + skb = __skb_dequeue(&sch->gso_skb); qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { @@ -844,6 +898,14 @@ static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) qdisc_qstats_drop(sch); } +static inline int qdisc_drop_cpu(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + __qdisc_drop(skb, to_free); + qdisc_qstats_cpu_drop(sch); + + return NET_XMIT_DROP; +} static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index deaafa9b09cb..20ff237c5eb2 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -145,12 +145,13 @@ SCTP_SUBTYPE_CONSTRUCTOR(OTHER, enum sctp_event_other, other) SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) -#define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA) +#define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA || \ + a->chunk_hdr->type == SCTP_CID_I_DATA) /* Calculate the actual data size in a data chunk */ -#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end)\ - - (unsigned long)(c->chunk_hdr)\ - - sizeof(struct sctp_data_chunk))) +#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end) - \ + (unsigned long)(c->chunk_hdr) - \ + sctp_datachk_len(&c->asoc->stream))) /* Internal error codes */ enum sctp_ierror { diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 906a9c0efa71..20c0c1be2ca7 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -116,7 +116,7 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); -int sctp_transport_walk_start(struct rhashtable_iter *iter); +void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, struct rhashtable_iter *iter); @@ -444,13 +444,13 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) int frag = pmtu; frag -= sp->pf->af->net_header_len; - frag -= sizeof(struct sctphdr) + sizeof(struct sctp_data_chunk); + frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream); if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag); frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN - - sizeof(struct sctp_data_chunk))); + sctp_datachk_len(&asoc->stream))); return frag; } diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 70fb397f65b0..2883c43c5258 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -197,10 +197,14 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc, struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc, const __u32 lowest_tsn, const struct sctp_chunk *chunk); -struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, +struct sctp_chunk *sctp_make_idata(const struct sctp_association *asoc, + __u8 flags, int paylen, gfp_t gfp); +struct sctp_chunk *sctp_make_ifwdtsn(const struct sctp_association *asoc, + __u32 new_cum_tsn, size_t nstreams, + struct sctp_ifwdtsn_skip *skiplist); +struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc, const struct sctp_sndrcvinfo *sinfo, - int len, const __u8 flags, - __u16 ssn, gfp_t gfp); + int len, __u8 flags, gfp_t gfp); struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc, const __u32 lowest_tsn); struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc); @@ -342,7 +346,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) __u16 size; size = ntohs(chunk->chunk_hdr->length); - size -= sizeof(struct sctp_data_chunk); + size -= sctp_datahdr_len(&chunk->asoc->stream); return size; } @@ -358,6 +362,12 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) typecheck(__u32, b) && \ ((__s32)((a) - (b)) <= 0)) +/* Compare two MIDs */ +#define MID_lt(a, b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((a) - (b)) < 0)) + /* Compare two SSNs */ #define SSN_lt(a,b) \ (typecheck(__u16, a) && \ diff --git a/include/net/sctp/stream_interleave.h b/include/net/sctp/stream_interleave.h new file mode 100644 index 000000000000..6657711c8bc4 --- /dev/null +++ b/include/net/sctp/stream_interleave.h @@ -0,0 +1,61 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * These are definitions used by the stream schedulers, defined in RFC + * draft ndata (https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-11) + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. + * + * Please send any bug reports or fixes you make to the + * email addresses: + * lksctp developers <linux-sctp@vger.kernel.org> + * + * Written or modified by: + * Xin Long <lucien.xin@gmail.com> + */ + +#ifndef __sctp_stream_interleave_h__ +#define __sctp_stream_interleave_h__ + +struct sctp_stream_interleave { + __u16 data_chunk_len; + __u16 ftsn_chunk_len; + /* (I-)DATA process */ + struct sctp_chunk *(*make_datafrag)(const struct sctp_association *asoc, + const struct sctp_sndrcvinfo *sinfo, + int len, __u8 flags, gfp_t gfp); + void (*assign_number)(struct sctp_chunk *chunk); + bool (*validate_data)(struct sctp_chunk *chunk); + int (*ulpevent_data)(struct sctp_ulpq *ulpq, + struct sctp_chunk *chunk, gfp_t gfp); + int (*enqueue_event)(struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event); + void (*renege_events)(struct sctp_ulpq *ulpq, + struct sctp_chunk *chunk, gfp_t gfp); + void (*start_pd)(struct sctp_ulpq *ulpq, gfp_t gfp); + void (*abort_pd)(struct sctp_ulpq *ulpq, gfp_t gfp); + /* (I-)FORWARD-TSN process */ + void (*generate_ftsn)(struct sctp_outq *q, __u32 ctsn); + bool (*validate_ftsn)(struct sctp_chunk *chunk); + void (*report_ftsn)(struct sctp_ulpq *ulpq, __u32 ftsn); + void (*handle_ftsn)(struct sctp_ulpq *ulpq, + struct sctp_chunk *chunk); +}; + +void sctp_stream_interleave_init(struct sctp_stream *stream); + +#endif /* __sctp_stream_interleave_h__ */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2f8f93da5dc2..8ac4d5cdbfed 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -89,6 +89,7 @@ struct sctp_stream; #include <net/sctp/tsnmap.h> #include <net/sctp/ulpevent.h> #include <net/sctp/ulpqueue.h> +#include <net/sctp/stream_interleave.h> /* Structures useful for managing bind/connect. */ @@ -217,6 +218,7 @@ struct sctp_sock { disable_fragments:1, v4mapped:1, frag_interleave:1, + strm_interleave:1, recvrcvinfo:1, recvnxtinfo:1, data_ready_signalled:1; @@ -397,6 +399,28 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new); #define sctp_ssn_skip(stream, type, sid, ssn) \ ((stream)->type[sid].ssn = ssn + 1) +/* What is the current MID number for this stream? */ +#define sctp_mid_peek(stream, type, sid) \ + ((stream)->type[sid].mid) + +/* Return the next MID number for this stream. */ +#define sctp_mid_next(stream, type, sid) \ + ((stream)->type[sid].mid++) + +/* Skip over this mid and all below. */ +#define sctp_mid_skip(stream, type, sid, mid) \ + ((stream)->type[sid].mid = mid + 1) + +#define sctp_stream_in(asoc, sid) (&(asoc)->stream.in[sid]) + +/* What is the current MID_uo number for this stream? */ +#define sctp_mid_uo_peek(stream, type, sid) \ + ((stream)->type[sid].mid_uo) + +/* Return the next MID_uo number for this stream. */ +#define sctp_mid_uo_next(stream, type, sid) \ + ((stream)->type[sid].mid_uo++) + /* * Pointers to address related SCTP functions. * (i.e. things that depend on the address family.) @@ -574,6 +598,8 @@ struct sctp_chunk { struct sctp_addiphdr *addip_hdr; struct sctp_fwdtsn_hdr *fwdtsn_hdr; struct sctp_authhdr *auth_hdr; + struct sctp_idatahdr *idata_hdr; + struct sctp_ifwdtsn_hdr *ifwdtsn_hdr; } subh; __u8 *chunk_end; @@ -620,6 +646,7 @@ struct sctp_chunk { __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */ has_tsn:1, /* Does this chunk have a TSN yet? */ has_ssn:1, /* Does this chunk have a SSN yet? */ +#define has_mid has_ssn singleton:1, /* Only chunk in the packet? */ end_of_packet:1, /* Last chunk in the packet? */ ecn_ce_done:1, /* Have we processed the ECN CE bit? */ @@ -1073,6 +1100,7 @@ void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8); void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); void sctp_prsctp_prune(struct sctp_association *asoc, struct sctp_sndrcvinfo *sinfo, int msg_len); +void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn); /* Uncork and flush an outqueue. */ static inline void sctp_outq_cork(struct sctp_outq *q) { @@ -1357,13 +1385,25 @@ struct sctp_stream_out_ext { }; struct sctp_stream_out { - __u16 ssn; - __u8 state; + union { + __u32 mid; + __u16 ssn; + }; + __u32 mid_uo; struct sctp_stream_out_ext *ext; + __u8 state; }; struct sctp_stream_in { - __u16 ssn; + union { + __u32 mid; + __u16 ssn; + }; + __u32 mid_uo; + __u32 fsn; + __u32 fsn_uo; + char pd_mode; + char pd_mode_uo; }; struct sctp_stream { @@ -1387,11 +1427,32 @@ struct sctp_stream { struct sctp_stream_out_ext *rr_next; }; }; + struct sctp_stream_interleave *si; }; #define SCTP_STREAM_CLOSED 0x00 #define SCTP_STREAM_OPEN 0x01 +static inline __u16 sctp_datachk_len(const struct sctp_stream *stream) +{ + return stream->si->data_chunk_len; +} + +static inline __u16 sctp_datahdr_len(const struct sctp_stream *stream) +{ + return stream->si->data_chunk_len - sizeof(struct sctp_chunkhdr); +} + +static inline __u16 sctp_ftsnchk_len(const struct sctp_stream *stream) +{ + return stream->si->ftsn_chunk_len; +} + +static inline __u16 sctp_ftsnhdr_len(const struct sctp_stream *stream) +{ + return stream->si->ftsn_chunk_len - sizeof(struct sctp_chunkhdr); +} + /* SCTP_GET_ASSOC_STATS counters */ struct sctp_priv_assoc_stats { /* Maximum observed rto in the association during subsequent @@ -1940,6 +2001,7 @@ struct sctp_association { __u8 need_ecne:1, /* Need to send an ECNE Chunk? */ temp:1, /* Is it a temporary association? */ force_delay:1, + intl_enable:1, prsctp_enable:1, reconf_enable:1; diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 231dc42f1da6..51b4e0626c34 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -45,19 +45,29 @@ /* A structure to carry information to the ULP (e.g. Sockets API) */ /* Warning: This sits inside an skb.cb[] area. Be very careful of * growing this structure as it is at the maximum limit now. + * + * sctp_ulpevent is saved in sk->cb(48 bytes), whose last 4 bytes + * have been taken by sock_skb_cb, So here it has to use 'packed' + * to make sctp_ulpevent fit into the rest 44 bytes. */ struct sctp_ulpevent { struct sctp_association *asoc; struct sctp_chunk *chunk; unsigned int rmem_len; - __u32 ppid; + union { + __u32 mid; + __u16 ssn; + }; + union { + __u32 ppid; + __u32 fsn; + }; __u32 tsn; __u32 cumtsn; __u16 stream; - __u16 ssn; __u16 flags; __u16 msg_flags; -}; +} __packed; /* Retrieve the skb this event sits inside of. */ static inline struct sk_buff *sctp_event2skb(const struct sctp_ulpevent *ev) @@ -112,7 +122,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event( struct sctp_ulpevent *sctp_ulpevent_make_pdapi( const struct sctp_association *asoc, - __u32 indication, gfp_t gfp); + __u32 indication, __u32 sid, __u32 seq, + __u32 flags, gfp_t gfp); struct sctp_ulpevent *sctp_ulpevent_make_adaptation_indication( const struct sctp_association *asoc, gfp_t gfp); @@ -140,6 +151,10 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event( const struct sctp_association *asoc, __u16 flags, __u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp); +struct sctp_ulpevent *sctp_make_reassembled_event( + struct net *net, struct sk_buff_head *queue, + struct sk_buff *f_frag, struct sk_buff *l_frag); + void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *); void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event, diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index e0dce07b8794..bb0ecba3db2b 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -45,6 +45,7 @@ struct sctp_ulpq { char pd_mode; struct sctp_association *asoc; struct sk_buff_head reasm; + struct sk_buff_head reasm_uo; struct sk_buff_head lobby; }; @@ -76,11 +77,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc); void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn); void sctp_ulpq_reasm_flushtsn(struct sctp_ulpq *, __u32); -#endif /* __sctp_ulpqueue_h__ */ - - - - - +__u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, + struct sk_buff_head *list, __u16 needed); +#endif /* __sctp_ulpqueue_h__ */ diff --git a/include/net/sock.h b/include/net/sock.h index 9155da422692..9a9047268d37 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2407,4 +2407,15 @@ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) return *proto->sysctl_rmem; } +/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) + * Some wifi drivers need to tweak it to get more chunks. + * They can use this helper from their ndo_start_xmit() + */ +static inline void sk_pacing_shift_update(struct sock *sk, int val) +{ + if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val) + return; + sk->sk_pacing_shift = val; +} + #endif /* _SOCK_H */ diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 21d253c9a8c6..a2e9cbca5c9e 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -8,10 +8,8 @@ struct tcf_mirred { struct tc_action common; int tcfm_eaction; - int tcfm_ifindex; bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; - struct net *net; struct list_head tcfm_list; }; #define to_mirred(a) ((struct tcf_mirred *)a) @@ -34,9 +32,9 @@ static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a) return false; } -static inline int tcf_mirred_ifindex(const struct tc_action *a) +static inline struct net_device *tcf_mirred_dev(const struct tc_action *a) { - return to_mirred(a)->tcfm_ifindex; + return rtnl_dereference(to_mirred(a)->tcfm_dev); } #endif /* __NET_TC_MIR_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6da880d2f022..6939e69d3c37 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1507,8 +1507,7 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, /* From tcp_fastopen.c */ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, - struct tcp_fastopen_cookie *cookie, int *syn_loss, - unsigned long *last_syn_loss); + struct tcp_fastopen_cookie *cookie); void tcp_fastopen_cache_set(struct sock *sk, u16 mss, struct tcp_fastopen_cookie *cookie, bool syn_lost, u16 try_exp); @@ -1546,7 +1545,7 @@ extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; void tcp_fastopen_active_disable(struct sock *sk); bool tcp_fastopen_active_should_disable(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk); -void tcp_fastopen_active_timeout_reset(void); +void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired); /* Latencies incurred by various limits for a sender. They are * chronograph-like stats that are mutually exclusive. @@ -2011,10 +2010,12 @@ static inline int tcp_call_bpf(struct sock *sk, int op) struct bpf_sock_ops_kern sock_ops; int ret; - if (sk_fullsock(sk)) + memset(&sock_ops, 0, sizeof(sock_ops)); + if (sk_fullsock(sk)) { + sock_ops.is_fullsock = 1; sock_owned_by_me(sk); + } - memset(&sock_ops, 0, sizeof(sock_ops)); sock_ops.sk = sk; sock_ops.op = op; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index dc28a98ce97c..1ec0c4760646 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -968,7 +968,7 @@ static inline bool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_c /* A struct encoding bundle of transformations to apply to some set of flow. * - * dst->child points to the next element of bundle. + * xdst->child points to the next element of bundle. * dst->xfrm points to an instanse of transformer. * * Due to unfortunate limitations of current routing cache, which we @@ -984,6 +984,8 @@ struct xfrm_dst { struct rt6_info rt6; } u; struct dst_entry *route; + struct dst_entry *child; + struct dst_entry *path; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; u32 xfrm_genid; @@ -994,7 +996,35 @@ struct xfrm_dst { u32 path_cookie; }; +static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst) +{ #ifdef CONFIG_XFRM + if (dst->xfrm) { + const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst; + + return xdst->path; + } +#endif + return (struct dst_entry *) dst; +} + +static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst) +{ +#ifdef CONFIG_XFRM + if (dst->xfrm) { + struct xfrm_dst *xdst = (struct xfrm_dst *) dst; + return xdst->child; + } +#endif + return NULL; +} + +#ifdef CONFIG_XFRM +static inline void xfrm_dst_set_child(struct xfrm_dst *xdst, struct dst_entry *child) +{ + xdst->child = child; +} + static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) { xfrm_pols_put(xdst->pols, xdst->num_pols); @@ -1866,12 +1896,14 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) { struct xfrm_state *x = dst->xfrm; + struct xfrm_dst *xdst; if (!x || !x->type_offload) return false; - if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) && - !dst->child->xfrm) + xdst = (struct xfrm_dst *) dst; + if (x->xso.offload_handle && (x->xso.dev == xfrm_dst_path(dst)->dev) && + !xdst->child->xfrm) return true; return false; diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h index 1bee3e7fdf32..8ea966448b58 100644 --- a/include/trace/events/bridge.h +++ b/include/trace/events/bridge.h @@ -82,8 +82,8 @@ TRACE_EVENT(fdb_delete, TP_fast_assign( __assign_str(br_dev, br->dev->name); __assign_str(dev, f->dst ? f->dst->dev->name : "null"); - memcpy(__entry->addr, f->addr.addr, ETH_ALEN); - __entry->vid = f->vlan_id; + memcpy(__entry->addr, f->key.addr.addr, ETH_ALEN); + __entry->vid = f->key.vlan_id; ), TP_printk("br_dev %s dev %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u", diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4c223ab30293..80d62e88590c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -941,6 +941,12 @@ struct bpf_sock_ops { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + __u32 is_fullsock; /* Some TCP fields are only valid if + * there is a full socket. If not, the + * fields read as zero. + */ + __u32 snd_cwnd; + __u32 srtt_us; /* Averaged RTT << 3 in usecs */ }; /* List of known BPF sock_ops operators. diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index ac71559314e7..44a0b675a6bc 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1686,6 +1686,7 @@ enum ethtool_reset_flags { ETH_RESET_PHY = 1 << 6, /* Transceiver/PHY */ ETH_RESET_RAM = 1 << 7, /* RAM shared between * multiple components */ + ETH_RESET_AP = 1 << 8, /* Application processor */ ETH_RESET_DEDICATED = 0x0000ffff, /* All components dedicated to * this interface */ diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 3ee3bf7c8526..87b7529fcdfe 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -47,6 +47,7 @@ #define ETH_P_PUP 0x0200 /* Xerox PUP packet */ #define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */ #define ETH_P_TSN 0x22F0 /* TSN (IEEE 1722) packet */ +#define ETH_P_ERSPAN2 0x22EB /* ERSPAN version 2 (type III) */ #define ETH_P_IP 0x0800 /* Internet Protocol packet */ #define ETH_P_X25 0x0805 /* CCITT X.25 */ #define ETH_P_ARP 0x0806 /* Address Resolution packet */ diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 030d3e6d6029..fb38c1797131 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -57,6 +57,7 @@ */ #define TUNSETVNETBE _IOW('T', 222, int) #define TUNGETVNETBE _IOR('T', 223, int) +#define TUNSETSTEERINGEBPF _IOR('T', 224, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index e68dadbd6d45..1b3d148c4560 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -137,6 +137,9 @@ enum { IFLA_GRE_IGNORE_DF, IFLA_GRE_FWMARK, IFLA_GRE_ERSPAN_INDEX, + IFLA_GRE_ERSPAN_VER, + IFLA_GRE_ERSPAN_DIR, + IFLA_GRE_ERSPAN_HWID, __IFLA_GRE_MAX, }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index d9adab32dbee..4c4db14786bd 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -125,6 +125,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_SOCKOPT_PEELOFF_FLAGS 122 #define SCTP_STREAM_SCHEDULER 123 #define SCTP_STREAM_SCHEDULER_VALUE 124 +#define SCTP_INTERLEAVING_SUPPORTED 125 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -459,6 +460,8 @@ struct sctp_pdapi_event { __u32 pdapi_length; __u32 pdapi_indication; sctp_assoc_t pdapi_assoc_id; + __u32 pdapi_stream; + __u32 pdapi_seq; }; enum { SCTP_PARTIAL_DELIVERY_ABORTED=0, }; |