diff options
98 files changed, 2387 insertions, 1132 deletions
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 12eb8caa4263..50e071444a5c 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -140,6 +140,29 @@ static u32 hv_copyto_ringbuffer( return start_write_offset; } +/* + * + * hv_get_ringbuffer_availbytes() + * + * Get number of bytes available to read and to write to + * for the specified ring buffer + */ +static void +hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi, + u32 *read, u32 *write) +{ + u32 read_loc, write_loc, dsize; + + /* Capture the read/write indices before they changed */ + read_loc = READ_ONCE(rbi->ring_buffer->read_index); + write_loc = READ_ONCE(rbi->ring_buffer->write_index); + dsize = rbi->ring_datasize; + + *write = write_loc >= read_loc ? dsize - (write_loc - read_loc) : + read_loc - write_loc; + *read = dsize - *write; +} + /* Get various debug metrics for the specified ring buffer. */ void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, struct hv_ring_buffer_debug_info *debug_info) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 7f26f5dafca7..561b05089cb6 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1029,8 +1029,7 @@ int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) EXPORT_SYMBOL(b53_vlan_filtering); int b53_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + const struct switchdev_obj_port_vlan *vlan) { struct b53_device *dev = ds->priv; @@ -1047,8 +1046,7 @@ int b53_vlan_prepare(struct dsa_switch *ds, int port, EXPORT_SYMBOL(b53_vlan_prepare); void b53_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + const struct switchdev_obj_port_vlan *vlan) { struct b53_device *dev = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 2af0155efce2..d954cf36ecd8 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -295,11 +295,9 @@ void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state); void b53_br_fast_age(struct dsa_switch *ds, int port); int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering); int b53_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); + const struct switchdev_obj_port_vlan *vlan); void b53_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); + const struct switchdev_obj_port_vlan *vlan); int b53_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); int b53_fdb_add(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index bb71d3d6f65b..7aa84ee4e771 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -174,9 +174,9 @@ static int dsa_loop_port_vlan_filtering(struct dsa_switch *ds, int port, return 0; } -static int dsa_loop_port_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) +static int +dsa_loop_port_vlan_prepare(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { struct dsa_loop_priv *ps = ds->priv; struct mii_bus *bus = ps->bus; @@ -193,8 +193,7 @@ static int dsa_loop_port_vlan_prepare(struct dsa_switch *ds, int port, } static void dsa_loop_port_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + const struct switchdev_obj_port_vlan *vlan) { bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index b24566bb74d2..ea59dadefb33 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1217,8 +1217,7 @@ static int lan9303_port_fdb_dump(struct dsa_switch *ds, int port, } static int lan9303_port_mdb_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) + const struct switchdev_obj_port_mdb *mdb) { struct lan9303 *chip = ds->priv; @@ -1235,8 +1234,7 @@ static int lan9303_port_mdb_prepare(struct dsa_switch *ds, int port, } static void lan9303_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) + const struct switchdev_obj_port_mdb *mdb) { struct lan9303 *chip = ds->priv; diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index b5be93a1e0df..663b0d5b982b 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -559,8 +559,7 @@ static int ksz_port_vlan_filtering(struct dsa_switch *ds, int port, bool flag) } static int ksz_port_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + const struct switchdev_obj_port_vlan *vlan) { /* nothing needed */ @@ -568,8 +567,7 @@ static int ksz_port_vlan_prepare(struct dsa_switch *ds, int port, } static void ksz_port_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + const struct switchdev_obj_port_vlan *vlan) { struct ksz_device *dev = ds->priv; u32 vlan_table[3]; @@ -858,16 +856,14 @@ exit: } static int ksz_port_mdb_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) + const struct switchdev_obj_port_mdb *mdb) { /* nothing to do */ return 0; } static void ksz_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) + const struct switchdev_obj_port_mdb *mdb) { struct ksz_device *dev = ds->priv; u32 static_table[4]; diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 8171055fde7a..b5e0987c88f0 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1185,8 +1185,7 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, static int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + const struct switchdev_obj_port_vlan *vlan) { struct mv88e6xxx_chip *chip = ds->priv; int err; @@ -1295,8 +1294,7 @@ static int _mv88e6xxx_port_vlan_add(struct mv88e6xxx_chip *chip, int port, } static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + const struct switchdev_obj_port_vlan *vlan) { struct mv88e6xxx_chip *chip = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; @@ -3788,8 +3786,7 @@ free: } static int mv88e6xxx_port_mdb_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) + const struct switchdev_obj_port_mdb *mdb) { /* We don't need any dynamic resource from the kernel (yet), * so skip the prepare phase. @@ -3799,8 +3796,7 @@ static int mv88e6xxx_port_mdb_prepare(struct dsa_switch *ds, int port, } static void mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) + const struct switchdev_obj_port_mdb *mdb) { struct mv88e6xxx_chip *chip = ds->priv; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index b13ce5ebde8d..fe7599f404bf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1376,6 +1376,9 @@ static int bnxt_firmware_reset(struct net_device *dev, req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP; req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP; break; + case BNXT_FW_RESET_AP: + req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP; + break; default: return -EINVAL; } @@ -2522,6 +2525,14 @@ static int bnxt_reset(struct net_device *dev, u32 *flags) rc = bnxt_firmware_reset(dev, BNXT_FW_RESET_CHIP); if (!rc) netdev_info(dev, "Reset request successful. Reload driver to complete reset\n"); + } else if (*flags == ETH_RESET_AP) { + /* This feature is not supported in older firmware versions */ + if (bp->hwrm_spec_code < 0x10803) + return -EOPNOTSUPP; + + rc = bnxt_firmware_reset(dev, BNXT_FW_RESET_AP); + if (!rc) + netdev_info(dev, "Reset Application Processor request successful.\n"); } else { rc = -EINVAL; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h index ff601b42fcc8..836ef682f24c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h @@ -34,6 +34,7 @@ struct bnxt_led_cfg { #define BNXT_LED_DFLT_ENABLES(x) \ cpu_to_le32(BNXT_LED_DFLT_ENA << (BNXT_LED_DFLT_ENA_SHIFT * (x))) +#define BNXT_FW_RESET_AP 0xfffe #define BNXT_FW_RESET_CHIP 0xffff extern const struct ethtool_ops bnxt_ethtool_ops; diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index c93f3a2dc6c1..3165c2ba58f9 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -164,14 +164,38 @@ #define GEM_DCFG5 0x0290 /* Design Config 5 */ #define GEM_DCFG6 0x0294 /* Design Config 6 */ #define GEM_DCFG7 0x0298 /* Design Config 7 */ +#define GEM_DCFG8 0x029C /* Design Config 8 */ #define GEM_TXBDCTRL 0x04cc /* TX Buffer Descriptor control register */ #define GEM_RXBDCTRL 0x04d0 /* RX Buffer Descriptor control register */ +/* Screener Type 2 match registers */ +#define GEM_SCRT2 0x540 + +/* EtherType registers */ +#define GEM_ETHT 0x06E0 + +/* Type 2 compare registers */ +#define GEM_T2CMPW0 0x0700 +#define GEM_T2CMPW1 0x0704 +#define T2CMP_OFST(t2idx) (t2idx * 2) + +/* type 2 compare registers + * each location requires 3 compare regs + */ +#define GEM_IP4SRC_CMP(idx) (idx * 3) +#define GEM_IP4DST_CMP(idx) (idx * 3 + 1) +#define GEM_PORT_CMP(idx) (idx * 3 + 2) + +/* Which screening type 2 EtherType register will be used (0 - 7) */ +#define SCRT2_ETHT 0 + #define GEM_ISR(hw_q) (0x0400 + ((hw_q) << 2)) #define GEM_TBQP(hw_q) (0x0440 + ((hw_q) << 2)) #define GEM_TBQPH(hw_q) (0x04C8) #define GEM_RBQP(hw_q) (0x0480 + ((hw_q) << 2)) +#define GEM_RBQS(hw_q) (0x04A0 + ((hw_q) << 2)) +#define GEM_RBQPH(hw_q) (0x04D4) #define GEM_IER(hw_q) (0x0600 + ((hw_q) << 2)) #define GEM_IDR(hw_q) (0x0620 + ((hw_q) << 2)) #define GEM_IMR(hw_q) (0x0640 + ((hw_q) << 2)) @@ -455,6 +479,16 @@ #define GEM_DAW64_OFFSET 23 #define GEM_DAW64_SIZE 1 +/* Bitfields in DCFG8. */ +#define GEM_T1SCR_OFFSET 24 +#define GEM_T1SCR_SIZE 8 +#define GEM_T2SCR_OFFSET 16 +#define GEM_T2SCR_SIZE 8 +#define GEM_SCR2ETH_OFFSET 8 +#define GEM_SCR2ETH_SIZE 8 +#define GEM_SCR2CMP_OFFSET 0 +#define GEM_SCR2CMP_SIZE 8 + /* Bitfields in TISUBN */ #define GEM_SUBNSINCR_OFFSET 0 #define GEM_SUBNSINCR_SIZE 16 @@ -483,6 +517,66 @@ #define GEM_RXTSMODE_OFFSET 4 /* RX Descriptor Timestamp Insertion mode */ #define GEM_RXTSMODE_SIZE 2 +/* Bitfields in SCRT2 */ +#define GEM_QUEUE_OFFSET 0 /* Queue Number */ +#define GEM_QUEUE_SIZE 4 +#define GEM_VLANPR_OFFSET 4 /* VLAN Priority */ +#define GEM_VLANPR_SIZE 3 +#define GEM_VLANEN_OFFSET 8 /* VLAN Enable */ +#define GEM_VLANEN_SIZE 1 +#define GEM_ETHT2IDX_OFFSET 9 /* Index to screener type 2 EtherType register */ +#define GEM_ETHT2IDX_SIZE 3 +#define GEM_ETHTEN_OFFSET 12 /* EtherType Enable */ +#define GEM_ETHTEN_SIZE 1 +#define GEM_CMPA_OFFSET 13 /* Compare A - Index to screener type 2 Compare register */ +#define GEM_CMPA_SIZE 5 +#define GEM_CMPAEN_OFFSET 18 /* Compare A Enable */ +#define GEM_CMPAEN_SIZE 1 +#define GEM_CMPB_OFFSET 19 /* Compare B - Index to screener type 2 Compare register */ +#define GEM_CMPB_SIZE 5 +#define GEM_CMPBEN_OFFSET 24 /* Compare B Enable */ +#define GEM_CMPBEN_SIZE 1 +#define GEM_CMPC_OFFSET 25 /* Compare C - Index to screener type 2 Compare register */ +#define GEM_CMPC_SIZE 5 +#define GEM_CMPCEN_OFFSET 30 /* Compare C Enable */ +#define GEM_CMPCEN_SIZE 1 + +/* Bitfields in ETHT */ +#define GEM_ETHTCMP_OFFSET 0 /* EtherType compare value */ +#define GEM_ETHTCMP_SIZE 16 + +/* Bitfields in T2CMPW0 */ +#define GEM_T2CMP_OFFSET 16 /* 0xFFFF0000 compare value */ +#define GEM_T2CMP_SIZE 16 +#define GEM_T2MASK_OFFSET 0 /* 0x0000FFFF compare value or mask */ +#define GEM_T2MASK_SIZE 16 + +/* Bitfields in T2CMPW1 */ +#define GEM_T2DISMSK_OFFSET 9 /* disable mask */ +#define GEM_T2DISMSK_SIZE 1 +#define GEM_T2CMPOFST_OFFSET 7 /* compare offset */ +#define GEM_T2CMPOFST_SIZE 2 +#define GEM_T2OFST_OFFSET 0 /* offset value */ +#define GEM_T2OFST_SIZE 7 + +/* Offset for screener type 2 compare values (T2CMPOFST). + * Note the offset is applied after the specified point, + * e.g. GEM_T2COMPOFST_ETYPE denotes the EtherType field, so an offset + * of 12 bytes from this would be the source IP address in an IP header + */ +#define GEM_T2COMPOFST_SOF 0 +#define GEM_T2COMPOFST_ETYPE 1 +#define GEM_T2COMPOFST_IPHDR 2 +#define GEM_T2COMPOFST_TCPUDP 3 + +/* offset from EtherType to IP address */ +#define ETYPE_SRCIP_OFFSET 12 +#define ETYPE_DSTIP_OFFSET 16 + +/* offset from IP header to port */ +#define IPHDR_SRCPORT_OFFSET 0 +#define IPHDR_DSTPORT_OFFSET 2 + /* Transmit DMA buffer descriptor Word 1 */ #define GEM_DMA_TXVALID_OFFSET 23 /* timestamp has been captured in the Buffer Descriptor */ #define GEM_DMA_TXVALID_SIZE 1 @@ -583,6 +677,8 @@ #define gem_writel(port, reg, value) (port)->macb_reg_writel((port), GEM_##reg, (value)) #define queue_readl(queue, reg) (queue)->bp->macb_reg_readl((queue)->bp, (queue)->reg) #define queue_writel(queue, reg, value) (queue)->bp->macb_reg_writel((queue)->bp, (queue)->reg, (value)) +#define gem_readl_n(port, reg, idx) (port)->macb_reg_readl((port), GEM_##reg + idx * 4) +#define gem_writel_n(port, reg, idx, value) (port)->macb_reg_writel((port), GEM_##reg + idx * 4, (value)) #define PTP_TS_BUFFER_SIZE 128 /* must be power of 2 */ @@ -920,13 +1016,42 @@ static const struct gem_statistic gem_statistics[] = { #define GEM_STATS_LEN ARRAY_SIZE(gem_statistics) +#define QUEUE_STAT_TITLE(title) { \ + .stat_string = title, \ +} + +/* per queue statistics, each should be unsigned long type */ +struct queue_stats { + union { + unsigned long first; + unsigned long rx_packets; + }; + unsigned long rx_bytes; + unsigned long rx_dropped; + unsigned long tx_packets; + unsigned long tx_bytes; + unsigned long tx_dropped; +}; + +static const struct gem_statistic queue_statistics[] = { + QUEUE_STAT_TITLE("rx_packets"), + QUEUE_STAT_TITLE("rx_bytes"), + QUEUE_STAT_TITLE("rx_dropped"), + QUEUE_STAT_TITLE("tx_packets"), + QUEUE_STAT_TITLE("tx_bytes"), + QUEUE_STAT_TITLE("tx_dropped"), +}; + +#define QUEUE_STATS_LEN ARRAY_SIZE(queue_statistics) + struct macb; +struct macb_queue; struct macb_or_gem_ops { int (*mog_alloc_rx_buffers)(struct macb *bp); void (*mog_free_rx_buffers)(struct macb *bp); void (*mog_init_rings)(struct macb *bp); - int (*mog_rx)(struct macb *bp, int budget); + int (*mog_rx)(struct macb_queue *queue, int budget); }; /* MACB-PTP interface: adapt to platform needs. */ @@ -968,6 +1093,9 @@ struct macb_queue { unsigned int IMR; unsigned int TBQP; unsigned int TBQPH; + unsigned int RBQS; + unsigned int RBQP; + unsigned int RBQPH; unsigned int tx_head, tx_tail; struct macb_dma_desc *tx_ring; @@ -975,6 +1103,16 @@ struct macb_queue { dma_addr_t tx_ring_dma; struct work_struct tx_error_task; + dma_addr_t rx_ring_dma; + dma_addr_t rx_buffers_dma; + unsigned int rx_tail; + unsigned int rx_prepared_head; + struct macb_dma_desc *rx_ring; + struct sk_buff **rx_skbuff; + void *rx_buffers; + struct napi_struct napi; + struct queue_stats stats; + #ifdef CONFIG_MACB_USE_HWSTAMP struct work_struct tx_ts_task; unsigned int tx_ts_head, tx_ts_tail; @@ -982,6 +1120,16 @@ struct macb_queue { #endif }; +struct ethtool_rx_fs_item { + struct ethtool_rx_flow_spec fs; + struct list_head list; +}; + +struct ethtool_rx_fs_list { + struct list_head list; + unsigned int count; +}; + struct macb { void __iomem *regs; bool native_io; @@ -990,11 +1138,6 @@ struct macb { u32 (*macb_reg_readl)(struct macb *bp, int offset); void (*macb_reg_writel)(struct macb *bp, int offset, u32 value); - unsigned int rx_tail; - unsigned int rx_prepared_head; - struct macb_dma_desc *rx_ring; - struct sk_buff **rx_skbuff; - void *rx_buffers; size_t rx_buffer_size; unsigned int rx_ring_size; @@ -1011,15 +1154,11 @@ struct macb { struct clk *tx_clk; struct clk *rx_clk; struct net_device *dev; - struct napi_struct napi; union { struct macb_stats macb; struct gem_stats gem; } hw_stats; - dma_addr_t rx_ring_dma; - dma_addr_t rx_buffers_dma; - struct macb_or_gem_ops macbgem_ops; struct mii_bus *mii_bus; @@ -1040,7 +1179,7 @@ struct macb { int skb_length; /* saved skb length for pci_unmap_single */ unsigned int max_tx_length; - u64 ethtool_stats[GEM_STATS_LEN]; + u64 ethtool_stats[GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES]; unsigned int rx_frm_len_mask; unsigned int jumbo_max_len; @@ -1057,6 +1196,11 @@ struct macb { struct ptp_clock_info ptp_clock_info; struct tsu_incr tsu_incr; struct hwtstamp_config tstamp_config; + + /* RX queue filer rule set*/ + struct ethtool_rx_fs_list rx_fs_list; + spinlock_t rx_fs_lock; + unsigned int max_tuples; }; #ifdef CONFIG_MACB_USE_HWSTAMP diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 72a67f74b97b..c5fa87cdc6c4 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -194,17 +194,17 @@ static unsigned int macb_rx_ring_wrap(struct macb *bp, unsigned int index) return index & (bp->rx_ring_size - 1); } -static struct macb_dma_desc *macb_rx_desc(struct macb *bp, unsigned int index) +static struct macb_dma_desc *macb_rx_desc(struct macb_queue *queue, unsigned int index) { - index = macb_rx_ring_wrap(bp, index); - index = macb_adj_dma_desc_idx(bp, index); - return &bp->rx_ring[index]; + index = macb_rx_ring_wrap(queue->bp, index); + index = macb_adj_dma_desc_idx(queue->bp, index); + return &queue->rx_ring[index]; } -static void *macb_rx_buffer(struct macb *bp, unsigned int index) +static void *macb_rx_buffer(struct macb_queue *queue, unsigned int index) { - return bp->rx_buffers + bp->rx_buffer_size * - macb_rx_ring_wrap(bp, index); + return queue->rx_buffers + queue->bp->rx_buffer_size * + macb_rx_ring_wrap(queue->bp, index); } /* I/O accessors */ @@ -759,7 +759,9 @@ static void macb_tx_error_task(struct work_struct *work) macb_tx_ring_wrap(bp, tail), skb->data); bp->dev->stats.tx_packets++; + queue->stats.tx_packets++; bp->dev->stats.tx_bytes += skb->len; + queue->stats.tx_bytes += skb->len; } } else { /* "Buffers exhausted mid-frame" errors may only happen @@ -859,7 +861,9 @@ static void macb_tx_interrupt(struct macb_queue *queue) macb_tx_ring_wrap(bp, tail), skb->data); bp->dev->stats.tx_packets++; + queue->stats.tx_packets++; bp->dev->stats.tx_bytes += skb->len; + queue->stats.tx_bytes += skb->len; } /* Now we can safely release resources */ @@ -881,24 +885,25 @@ static void macb_tx_interrupt(struct macb_queue *queue) netif_wake_subqueue(bp->dev, queue_index); } -static void gem_rx_refill(struct macb *bp) +static void gem_rx_refill(struct macb_queue *queue) { unsigned int entry; struct sk_buff *skb; dma_addr_t paddr; + struct macb *bp = queue->bp; struct macb_dma_desc *desc; - while (CIRC_SPACE(bp->rx_prepared_head, bp->rx_tail, - bp->rx_ring_size) > 0) { - entry = macb_rx_ring_wrap(bp, bp->rx_prepared_head); + while (CIRC_SPACE(queue->rx_prepared_head, queue->rx_tail, + bp->rx_ring_size) > 0) { + entry = macb_rx_ring_wrap(bp, queue->rx_prepared_head); /* Make hw descriptor updates visible to CPU */ rmb(); - bp->rx_prepared_head++; - desc = macb_rx_desc(bp, entry); + queue->rx_prepared_head++; + desc = macb_rx_desc(queue, entry); - if (!bp->rx_skbuff[entry]) { + if (!queue->rx_skbuff[entry]) { /* allocate sk_buff for this free entry in ring */ skb = netdev_alloc_skb(bp->dev, bp->rx_buffer_size); if (unlikely(!skb)) { @@ -916,7 +921,7 @@ static void gem_rx_refill(struct macb *bp) break; } - bp->rx_skbuff[entry] = skb; + queue->rx_skbuff[entry] = skb; if (entry == bp->rx_ring_size - 1) paddr |= MACB_BIT(RX_WRAP); @@ -934,18 +939,18 @@ static void gem_rx_refill(struct macb *bp) /* Make descriptor updates visible to hardware */ wmb(); - netdev_vdbg(bp->dev, "rx ring: prepared head %d, tail %d\n", - bp->rx_prepared_head, bp->rx_tail); + netdev_vdbg(bp->dev, "rx ring: queue: %p, prepared head %d, tail %d\n", + queue, queue->rx_prepared_head, queue->rx_tail); } /* Mark DMA descriptors from begin up to and not including end as unused */ -static void discard_partial_frame(struct macb *bp, unsigned int begin, +static void discard_partial_frame(struct macb_queue *queue, unsigned int begin, unsigned int end) { unsigned int frag; for (frag = begin; frag != end; frag++) { - struct macb_dma_desc *desc = macb_rx_desc(bp, frag); + struct macb_dma_desc *desc = macb_rx_desc(queue, frag); desc->addr &= ~MACB_BIT(RX_USED); } @@ -959,8 +964,9 @@ static void discard_partial_frame(struct macb *bp, unsigned int begin, */ } -static int gem_rx(struct macb *bp, int budget) +static int gem_rx(struct macb_queue *queue, int budget) { + struct macb *bp = queue->bp; unsigned int len; unsigned int entry; struct sk_buff *skb; @@ -972,8 +978,8 @@ static int gem_rx(struct macb *bp, int budget) dma_addr_t addr; bool rxused; - entry = macb_rx_ring_wrap(bp, bp->rx_tail); - desc = macb_rx_desc(bp, entry); + entry = macb_rx_ring_wrap(bp, queue->rx_tail); + desc = macb_rx_desc(queue, entry); /* Make hw descriptor updates visible to CPU */ rmb(); @@ -985,24 +991,26 @@ static int gem_rx(struct macb *bp, int budget) if (!rxused) break; - bp->rx_tail++; + queue->rx_tail++; count++; if (!(ctrl & MACB_BIT(RX_SOF) && ctrl & MACB_BIT(RX_EOF))) { netdev_err(bp->dev, "not whole frame pointed by descriptor\n"); bp->dev->stats.rx_dropped++; + queue->stats.rx_dropped++; break; } - skb = bp->rx_skbuff[entry]; + skb = queue->rx_skbuff[entry]; if (unlikely(!skb)) { netdev_err(bp->dev, "inconsistent Rx descriptor chain\n"); bp->dev->stats.rx_dropped++; + queue->stats.rx_dropped++; break; } /* now everything is ready for receiving packet */ - bp->rx_skbuff[entry] = NULL; + queue->rx_skbuff[entry] = NULL; len = ctrl & bp->rx_frm_len_mask; netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len); @@ -1019,7 +1027,9 @@ static int gem_rx(struct macb *bp, int budget) skb->ip_summed = CHECKSUM_UNNECESSARY; bp->dev->stats.rx_packets++; + queue->stats.rx_packets++; bp->dev->stats.rx_bytes += skb->len; + queue->stats.rx_bytes += skb->len; gem_ptp_do_rxstamp(bp, skb, desc); @@ -1035,12 +1045,12 @@ static int gem_rx(struct macb *bp, int budget) netif_receive_skb(skb); } - gem_rx_refill(bp); + gem_rx_refill(queue); return count; } -static int macb_rx_frame(struct macb *bp, unsigned int first_frag, +static int macb_rx_frame(struct macb_queue *queue, unsigned int first_frag, unsigned int last_frag) { unsigned int len; @@ -1048,8 +1058,9 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag, unsigned int offset; struct sk_buff *skb; struct macb_dma_desc *desc; + struct macb *bp = queue->bp; - desc = macb_rx_desc(bp, last_frag); + desc = macb_rx_desc(queue, last_frag); len = desc->ctrl & bp->rx_frm_len_mask; netdev_vdbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n", @@ -1068,7 +1079,7 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag, if (!skb) { bp->dev->stats.rx_dropped++; for (frag = first_frag; ; frag++) { - desc = macb_rx_desc(bp, frag); + desc = macb_rx_desc(queue, frag); desc->addr &= ~MACB_BIT(RX_USED); if (frag == last_frag) break; @@ -1096,10 +1107,10 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag, frag_len = len - offset; } skb_copy_to_linear_data_offset(skb, offset, - macb_rx_buffer(bp, frag), + macb_rx_buffer(queue, frag), frag_len); offset += bp->rx_buffer_size; - desc = macb_rx_desc(bp, frag); + desc = macb_rx_desc(queue, frag); desc->addr &= ~MACB_BIT(RX_USED); if (frag == last_frag) @@ -1121,32 +1132,34 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag, return 0; } -static inline void macb_init_rx_ring(struct macb *bp) +static inline void macb_init_rx_ring(struct macb_queue *queue) { + struct macb *bp = queue->bp; dma_addr_t addr; struct macb_dma_desc *desc = NULL; int i; - addr = bp->rx_buffers_dma; + addr = queue->rx_buffers_dma; for (i = 0; i < bp->rx_ring_size; i++) { - desc = macb_rx_desc(bp, i); + desc = macb_rx_desc(queue, i); macb_set_addr(bp, desc, addr); desc->ctrl = 0; addr += bp->rx_buffer_size; } desc->addr |= MACB_BIT(RX_WRAP); - bp->rx_tail = 0; + queue->rx_tail = 0; } -static int macb_rx(struct macb *bp, int budget) +static int macb_rx(struct macb_queue *queue, int budget) { + struct macb *bp = queue->bp; bool reset_rx_queue = false; int received = 0; unsigned int tail; int first_frag = -1; - for (tail = bp->rx_tail; budget > 0; tail++) { - struct macb_dma_desc *desc = macb_rx_desc(bp, tail); + for (tail = queue->rx_tail; budget > 0; tail++) { + struct macb_dma_desc *desc = macb_rx_desc(queue, tail); u32 ctrl; /* Make hw descriptor updates visible to CPU */ @@ -1159,7 +1172,7 @@ static int macb_rx(struct macb *bp, int budget) if (ctrl & MACB_BIT(RX_SOF)) { if (first_frag != -1) - discard_partial_frame(bp, first_frag, tail); + discard_partial_frame(queue, first_frag, tail); first_frag = tail; } @@ -1171,7 +1184,7 @@ static int macb_rx(struct macb *bp, int budget) continue; } - dropped = macb_rx_frame(bp, first_frag, tail); + dropped = macb_rx_frame(queue, first_frag, tail); first_frag = -1; if (unlikely(dropped < 0)) { reset_rx_queue = true; @@ -1195,8 +1208,8 @@ static int macb_rx(struct macb *bp, int budget) ctrl = macb_readl(bp, NCR); macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE)); - macb_init_rx_ring(bp); - macb_writel(bp, RBQP, bp->rx_ring_dma); + macb_init_rx_ring(queue); + queue_writel(queue, RBQP, queue->rx_ring_dma); macb_writel(bp, NCR, ctrl | MACB_BIT(RE)); @@ -1205,16 +1218,17 @@ static int macb_rx(struct macb *bp, int budget) } if (first_frag != -1) - bp->rx_tail = first_frag; + queue->rx_tail = first_frag; else - bp->rx_tail = tail; + queue->rx_tail = tail; return received; } static int macb_poll(struct napi_struct *napi, int budget) { - struct macb *bp = container_of(napi, struct macb, napi); + struct macb_queue *queue = container_of(napi, struct macb_queue, napi); + struct macb *bp = queue->bp; int work_done; u32 status; @@ -1224,7 +1238,7 @@ static int macb_poll(struct napi_struct *napi, int budget) netdev_vdbg(bp->dev, "poll: status = %08lx, budget = %d\n", (unsigned long)status, budget); - work_done = bp->macbgem_ops.mog_rx(bp, budget); + work_done = bp->macbgem_ops.mog_rx(queue, budget); if (work_done < budget) { napi_complete_done(napi, work_done); @@ -1232,10 +1246,10 @@ static int macb_poll(struct napi_struct *napi, int budget) status = macb_readl(bp, RSR); if (status) { if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - macb_writel(bp, ISR, MACB_BIT(RCOMP)); + queue_writel(queue, ISR, MACB_BIT(RCOMP)); napi_reschedule(napi); } else { - macb_writel(bp, IER, MACB_RX_INT_FLAGS); + queue_writel(queue, IER, MACB_RX_INT_FLAGS); } } @@ -1282,9 +1296,9 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, MACB_BIT(RCOMP)); - if (napi_schedule_prep(&bp->napi)) { + if (napi_schedule_prep(&queue->napi)) { netdev_vdbg(bp->dev, "scheduling RX softirq\n"); - __napi_schedule(&bp->napi); + __napi_schedule(&queue->napi); } } @@ -1708,38 +1722,44 @@ static void gem_free_rx_buffers(struct macb *bp) { struct sk_buff *skb; struct macb_dma_desc *desc; + struct macb_queue *queue; dma_addr_t addr; + unsigned int q; int i; - if (!bp->rx_skbuff) - return; + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { + if (!queue->rx_skbuff) + continue; - for (i = 0; i < bp->rx_ring_size; i++) { - skb = bp->rx_skbuff[i]; + for (i = 0; i < bp->rx_ring_size; i++) { + skb = queue->rx_skbuff[i]; - if (!skb) - continue; + if (!skb) + continue; - desc = macb_rx_desc(bp, i); - addr = macb_get_addr(bp, desc); + desc = macb_rx_desc(queue, i); + addr = macb_get_addr(bp, desc); - dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size, - DMA_FROM_DEVICE); - dev_kfree_skb_any(skb); - skb = NULL; - } + dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size, + DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); + skb = NULL; + } - kfree(bp->rx_skbuff); - bp->rx_skbuff = NULL; + kfree(queue->rx_skbuff); + queue->rx_skbuff = NULL; + } } static void macb_free_rx_buffers(struct macb *bp) { - if (bp->rx_buffers) { + struct macb_queue *queue = &bp->queues[0]; + + if (queue->rx_buffers) { dma_free_coherent(&bp->pdev->dev, bp->rx_ring_size * bp->rx_buffer_size, - bp->rx_buffers, bp->rx_buffers_dma); - bp->rx_buffers = NULL; + queue->rx_buffers, queue->rx_buffers_dma); + queue->rx_buffers = NULL; } } @@ -1748,11 +1768,12 @@ static void macb_free_consistent(struct macb *bp) struct macb_queue *queue; unsigned int q; + queue = &bp->queues[0]; bp->macbgem_ops.mog_free_rx_buffers(bp); - if (bp->rx_ring) { + if (queue->rx_ring) { dma_free_coherent(&bp->pdev->dev, RX_RING_BYTES(bp), - bp->rx_ring, bp->rx_ring_dma); - bp->rx_ring = NULL; + queue->rx_ring, queue->rx_ring_dma); + queue->rx_ring = NULL; } for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { @@ -1768,32 +1789,37 @@ static void macb_free_consistent(struct macb *bp) static int gem_alloc_rx_buffers(struct macb *bp) { + struct macb_queue *queue; + unsigned int q; int size; - size = bp->rx_ring_size * sizeof(struct sk_buff *); - bp->rx_skbuff = kzalloc(size, GFP_KERNEL); - if (!bp->rx_skbuff) - return -ENOMEM; - else - netdev_dbg(bp->dev, - "Allocated %d RX struct sk_buff entries at %p\n", - bp->rx_ring_size, bp->rx_skbuff); + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { + size = bp->rx_ring_size * sizeof(struct sk_buff *); + queue->rx_skbuff = kzalloc(size, GFP_KERNEL); + if (!queue->rx_skbuff) + return -ENOMEM; + else + netdev_dbg(bp->dev, + "Allocated %d RX struct sk_buff entries at %p\n", + bp->rx_ring_size, queue->rx_skbuff); + } return 0; } static int macb_alloc_rx_buffers(struct macb *bp) { + struct macb_queue *queue = &bp->queues[0]; int size; size = bp->rx_ring_size * bp->rx_buffer_size; - bp->rx_buffers = dma_alloc_coherent(&bp->pdev->dev, size, - &bp->rx_buffers_dma, GFP_KERNEL); - if (!bp->rx_buffers) + queue->rx_buffers = dma_alloc_coherent(&bp->pdev->dev, size, + &queue->rx_buffers_dma, GFP_KERNEL); + if (!queue->rx_buffers) return -ENOMEM; netdev_dbg(bp->dev, "Allocated RX buffers of %d bytes at %08lx (mapped %p)\n", - size, (unsigned long)bp->rx_buffers_dma, bp->rx_buffers); + size, (unsigned long)queue->rx_buffers_dma, queue->rx_buffers); return 0; } @@ -1819,17 +1845,16 @@ static int macb_alloc_consistent(struct macb *bp) queue->tx_skb = kmalloc(size, GFP_KERNEL); if (!queue->tx_skb) goto out_err; - } - - size = RX_RING_BYTES(bp); - bp->rx_ring = dma_alloc_coherent(&bp->pdev->dev, size, - &bp->rx_ring_dma, GFP_KERNEL); - if (!bp->rx_ring) - goto out_err; - netdev_dbg(bp->dev, - "Allocated RX ring of %d bytes at %08lx (mapped %p)\n", - size, (unsigned long)bp->rx_ring_dma, bp->rx_ring); + size = RX_RING_BYTES(bp); + queue->rx_ring = dma_alloc_coherent(&bp->pdev->dev, size, + &queue->rx_ring_dma, GFP_KERNEL); + if (!queue->rx_ring) + goto out_err; + netdev_dbg(bp->dev, + "Allocated RX ring of %d bytes at %08lx (mapped %p)\n", + size, (unsigned long)queue->rx_ring_dma, queue->rx_ring); + } if (bp->macbgem_ops.mog_alloc_rx_buffers(bp)) goto out_err; @@ -1856,12 +1881,13 @@ static void gem_init_rings(struct macb *bp) desc->ctrl |= MACB_BIT(TX_WRAP); queue->tx_head = 0; queue->tx_tail = 0; - } - bp->rx_tail = 0; - bp->rx_prepared_head = 0; + queue->rx_tail = 0; + queue->rx_prepared_head = 0; + + gem_rx_refill(queue); + } - gem_rx_refill(bp); } static void macb_init_rings(struct macb *bp) @@ -1869,7 +1895,7 @@ static void macb_init_rings(struct macb *bp) int i; struct macb_dma_desc *desc = NULL; - macb_init_rx_ring(bp); + macb_init_rx_ring(&bp->queues[0]); for (i = 0; i < bp->tx_ring_size; i++) { desc = macb_tx_desc(&bp->queues[0], i); @@ -1978,11 +2004,20 @@ static u32 macb_dbw(struct macb *bp) */ static void macb_configure_dma(struct macb *bp) { + struct macb_queue *queue; + u32 buffer_size; + unsigned int q; u32 dmacfg; + buffer_size = bp->rx_buffer_size / RX_BUFFER_MULTIPLE; if (macb_is_gem(bp)) { dmacfg = gem_readl(bp, DMACFG) & ~GEM_BF(RXBS, -1L); - dmacfg |= GEM_BF(RXBS, bp->rx_buffer_size / RX_BUFFER_MULTIPLE); + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { + if (q) + queue_writel(queue, RBQS, buffer_size); + else + dmacfg |= GEM_BF(RXBS, buffer_size); + } if (bp->dma_burst_length) dmacfg = GEM_BFINS(FBLDO, bp->dma_burst_length, dmacfg); dmacfg |= GEM_BIT(TXPBMS) | GEM_BF(RXBMS, -1L); @@ -2051,12 +2086,12 @@ static void macb_init_hw(struct macb *bp) macb_configure_dma(bp); /* Initialize TX and RX buffers */ - macb_writel(bp, RBQP, lower_32_bits(bp->rx_ring_dma)); + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { + queue_writel(queue, RBQP, lower_32_bits(queue->rx_ring_dma)); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap & HW_DMA_CAP_64B) - macb_writel(bp, RBQPH, upper_32_bits(bp->rx_ring_dma)); + if (bp->hw_dma_cap & HW_DMA_CAP_64B) + queue_writel(queue, RBQPH, upper_32_bits(queue->rx_ring_dma)); #endif - for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma)); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT if (bp->hw_dma_cap & HW_DMA_CAP_64B) @@ -2197,6 +2232,8 @@ static int macb_open(struct net_device *dev) { struct macb *bp = netdev_priv(dev); size_t bufsz = dev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN; + struct macb_queue *queue; + unsigned int q; int err; netdev_dbg(bp->dev, "open\n"); @@ -2218,11 +2255,12 @@ static int macb_open(struct net_device *dev) return err; } - napi_enable(&bp->napi); - bp->macbgem_ops.mog_init_rings(bp); macb_init_hw(bp); + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) + napi_enable(&queue->napi); + /* schedule a link state check */ phy_start(dev->phydev); @@ -2237,10 +2275,14 @@ static int macb_open(struct net_device *dev) static int macb_close(struct net_device *dev) { struct macb *bp = netdev_priv(dev); + struct macb_queue *queue; unsigned long flags; + unsigned int q; netif_tx_stop_all_queues(dev); - napi_disable(&bp->napi); + + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) + napi_disable(&queue->napi); if (dev->phydev) phy_stop(dev->phydev); @@ -2270,7 +2312,10 @@ static int macb_change_mtu(struct net_device *dev, int new_mtu) static void gem_update_stats(struct macb *bp) { - unsigned int i; + struct macb_queue *queue; + unsigned int i, q, idx; + unsigned long *stat; + u32 *p = &bp->hw_stats.gem.tx_octets_31_0; for (i = 0; i < GEM_STATS_LEN; ++i, ++p) { @@ -2287,6 +2332,11 @@ static void gem_update_stats(struct macb *bp) *(++p) += val; } } + + idx = GEM_STATS_LEN; + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) + for (i = 0, stat = &queue->stats.first; i < QUEUE_STATS_LEN; ++i, ++stat) + bp->ethtool_stats[idx++] = *stat; } static struct net_device_stats *gem_get_stats(struct macb *bp) @@ -2334,14 +2384,17 @@ static void gem_get_ethtool_stats(struct net_device *dev, bp = netdev_priv(dev); gem_update_stats(bp); - memcpy(data, &bp->ethtool_stats, sizeof(u64) * GEM_STATS_LEN); + memcpy(data, &bp->ethtool_stats, sizeof(u64) + * (GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES)); } static int gem_get_sset_count(struct net_device *dev, int sset) { + struct macb *bp = netdev_priv(dev); + switch (sset) { case ETH_SS_STATS: - return GEM_STATS_LEN; + return GEM_STATS_LEN + bp->num_queues * QUEUE_STATS_LEN; default: return -EOPNOTSUPP; } @@ -2349,13 +2402,25 @@ static int gem_get_sset_count(struct net_device *dev, int sset) static void gem_get_ethtool_strings(struct net_device *dev, u32 sset, u8 *p) { + char stat_string[ETH_GSTRING_LEN]; + struct macb *bp = netdev_priv(dev); + struct macb_queue *queue; unsigned int i; + unsigned int q; switch (sset) { case ETH_SS_STATS: for (i = 0; i < GEM_STATS_LEN; i++, p += ETH_GSTRING_LEN) memcpy(p, gem_statistics[i].stat_string, ETH_GSTRING_LEN); + + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { + for (i = 0; i < QUEUE_STATS_LEN; i++, p += ETH_GSTRING_LEN) { + snprintf(stat_string, ETH_GSTRING_LEN, "q%d_%s", + q, queue_statistics[i].stat_string); + memcpy(p, stat_string, ETH_GSTRING_LEN); + } + } break; } } @@ -2603,6 +2668,308 @@ static int macb_get_ts_info(struct net_device *netdev, return ethtool_op_get_ts_info(netdev, info); } +static void gem_enable_flow_filters(struct macb *bp, bool enable) +{ + struct ethtool_rx_fs_item *item; + u32 t2_scr; + int num_t2_scr; + + num_t2_scr = GEM_BFEXT(T2SCR, gem_readl(bp, DCFG8)); + + list_for_each_entry(item, &bp->rx_fs_list.list, list) { + struct ethtool_rx_flow_spec *fs = &item->fs; + struct ethtool_tcpip4_spec *tp4sp_m; + + if (fs->location >= num_t2_scr) + continue; + + t2_scr = gem_readl_n(bp, SCRT2, fs->location); + + /* enable/disable screener regs for the flow entry */ + t2_scr = GEM_BFINS(ETHTEN, enable, t2_scr); + + /* only enable fields with no masking */ + tp4sp_m = &(fs->m_u.tcp_ip4_spec); + + if (enable && (tp4sp_m->ip4src == 0xFFFFFFFF)) + t2_scr = GEM_BFINS(CMPAEN, 1, t2_scr); + else + t2_scr = GEM_BFINS(CMPAEN, 0, t2_scr); + + if (enable && (tp4sp_m->ip4dst == 0xFFFFFFFF)) + t2_scr = GEM_BFINS(CMPBEN, 1, t2_scr); + else + t2_scr = GEM_BFINS(CMPBEN, 0, t2_scr); + + if (enable && ((tp4sp_m->psrc == 0xFFFF) || (tp4sp_m->pdst == 0xFFFF))) + t2_scr = GEM_BFINS(CMPCEN, 1, t2_scr); + else + t2_scr = GEM_BFINS(CMPCEN, 0, t2_scr); + + gem_writel_n(bp, SCRT2, fs->location, t2_scr); + } +} + +static void gem_prog_cmp_regs(struct macb *bp, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip4_spec *tp4sp_v, *tp4sp_m; + uint16_t index = fs->location; + u32 w0, w1, t2_scr; + bool cmp_a = false; + bool cmp_b = false; + bool cmp_c = false; + + tp4sp_v = &(fs->h_u.tcp_ip4_spec); + tp4sp_m = &(fs->m_u.tcp_ip4_spec); + + /* ignore field if any masking set */ + if (tp4sp_m->ip4src == 0xFFFFFFFF) { + /* 1st compare reg - IP source address */ + w0 = 0; + w1 = 0; + w0 = tp4sp_v->ip4src; + w1 = GEM_BFINS(T2DISMSK, 1, w1); /* 32-bit compare */ + w1 = GEM_BFINS(T2CMPOFST, GEM_T2COMPOFST_ETYPE, w1); + w1 = GEM_BFINS(T2OFST, ETYPE_SRCIP_OFFSET, w1); + gem_writel_n(bp, T2CMPW0, T2CMP_OFST(GEM_IP4SRC_CMP(index)), w0); + gem_writel_n(bp, T2CMPW1, T2CMP_OFST(GEM_IP4SRC_CMP(index)), w1); + cmp_a = true; + } + + /* ignore field if any masking set */ + if (tp4sp_m->ip4dst == 0xFFFFFFFF) { + /* 2nd compare reg - IP destination address */ + w0 = 0; + w1 = 0; + w0 = tp4sp_v->ip4dst; + w1 = GEM_BFINS(T2DISMSK, 1, w1); /* 32-bit compare */ + w1 = GEM_BFINS(T2CMPOFST, GEM_T2COMPOFST_ETYPE, w1); + w1 = GEM_BFINS(T2OFST, ETYPE_DSTIP_OFFSET, w1); + gem_writel_n(bp, T2CMPW0, T2CMP_OFST(GEM_IP4DST_CMP(index)), w0); + gem_writel_n(bp, T2CMPW1, T2CMP_OFST(GEM_IP4DST_CMP(index)), w1); + cmp_b = true; + } + + /* ignore both port fields if masking set in both */ + if ((tp4sp_m->psrc == 0xFFFF) || (tp4sp_m->pdst == 0xFFFF)) { + /* 3rd compare reg - source port, destination port */ + w0 = 0; + w1 = 0; + w1 = GEM_BFINS(T2CMPOFST, GEM_T2COMPOFST_IPHDR, w1); + if (tp4sp_m->psrc == tp4sp_m->pdst) { + w0 = GEM_BFINS(T2MASK, tp4sp_v->psrc, w0); + w0 = GEM_BFINS(T2CMP, tp4sp_v->pdst, w0); + w1 = GEM_BFINS(T2DISMSK, 1, w1); /* 32-bit compare */ + w1 = GEM_BFINS(T2OFST, IPHDR_SRCPORT_OFFSET, w1); + } else { + /* only one port definition */ + w1 = GEM_BFINS(T2DISMSK, 0, w1); /* 16-bit compare */ + w0 = GEM_BFINS(T2MASK, 0xFFFF, w0); + if (tp4sp_m->psrc == 0xFFFF) { /* src port */ + w0 = GEM_BFINS(T2CMP, tp4sp_v->psrc, w0); + w1 = GEM_BFINS(T2OFST, IPHDR_SRCPORT_OFFSET, w1); + } else { /* dst port */ + w0 = GEM_BFINS(T2CMP, tp4sp_v->pdst, w0); + w1 = GEM_BFINS(T2OFST, IPHDR_DSTPORT_OFFSET, w1); + } + } + gem_writel_n(bp, T2CMPW0, T2CMP_OFST(GEM_PORT_CMP(index)), w0); + gem_writel_n(bp, T2CMPW1, T2CMP_OFST(GEM_PORT_CMP(index)), w1); + cmp_c = true; + } + + t2_scr = 0; + t2_scr = GEM_BFINS(QUEUE, (fs->ring_cookie) & 0xFF, t2_scr); + t2_scr = GEM_BFINS(ETHT2IDX, SCRT2_ETHT, t2_scr); + if (cmp_a) + t2_scr = GEM_BFINS(CMPA, GEM_IP4SRC_CMP(index), t2_scr); + if (cmp_b) + t2_scr = GEM_BFINS(CMPB, GEM_IP4DST_CMP(index), t2_scr); + if (cmp_c) + t2_scr = GEM_BFINS(CMPC, GEM_PORT_CMP(index), t2_scr); + gem_writel_n(bp, SCRT2, index, t2_scr); +} + +static int gem_add_flow_filter(struct net_device *netdev, + struct ethtool_rxnfc *cmd) +{ + struct macb *bp = netdev_priv(netdev); + struct ethtool_rx_flow_spec *fs = &cmd->fs; + struct ethtool_rx_fs_item *item, *newfs; + int ret = -EINVAL; + bool added = false; + + newfs = kmalloc(sizeof(*newfs), GFP_KERNEL); + if (newfs == NULL) + return -ENOMEM; + memcpy(&newfs->fs, fs, sizeof(newfs->fs)); + + netdev_dbg(netdev, + "Adding flow filter entry,type=%u,queue=%u,loc=%u,src=%08X,dst=%08X,ps=%u,pd=%u\n", + fs->flow_type, (int)fs->ring_cookie, fs->location, + htonl(fs->h_u.tcp_ip4_spec.ip4src), + htonl(fs->h_u.tcp_ip4_spec.ip4dst), + htons(fs->h_u.tcp_ip4_spec.psrc), htons(fs->h_u.tcp_ip4_spec.pdst)); + + /* find correct place to add in list */ + if (list_empty(&bp->rx_fs_list.list)) + list_add(&newfs->list, &bp->rx_fs_list.list); + else { + list_for_each_entry(item, &bp->rx_fs_list.list, list) { + if (item->fs.location > newfs->fs.location) { + list_add_tail(&newfs->list, &item->list); + added = true; + break; + } else if (item->fs.location == fs->location) { + netdev_err(netdev, "Rule not added: location %d not free!\n", + fs->location); + ret = -EBUSY; + goto err; + } + } + if (!added) + list_add_tail(&newfs->list, &bp->rx_fs_list.list); + } + + gem_prog_cmp_regs(bp, fs); + bp->rx_fs_list.count++; + /* enable filtering if NTUPLE on */ + if (netdev->features & NETIF_F_NTUPLE) + gem_enable_flow_filters(bp, 1); + + return 0; + +err: + kfree(newfs); + return ret; +} + +static int gem_del_flow_filter(struct net_device *netdev, + struct ethtool_rxnfc *cmd) +{ + struct macb *bp = netdev_priv(netdev); + struct ethtool_rx_fs_item *item; + struct ethtool_rx_flow_spec *fs; + + if (list_empty(&bp->rx_fs_list.list)) + return -EINVAL; + + list_for_each_entry(item, &bp->rx_fs_list.list, list) { + if (item->fs.location == cmd->fs.location) { + /* disable screener regs for the flow entry */ + fs = &(item->fs); + netdev_dbg(netdev, + "Deleting flow filter entry,type=%u,queue=%u,loc=%u,src=%08X,dst=%08X,ps=%u,pd=%u\n", + fs->flow_type, (int)fs->ring_cookie, fs->location, + htonl(fs->h_u.tcp_ip4_spec.ip4src), + htonl(fs->h_u.tcp_ip4_spec.ip4dst), + htons(fs->h_u.tcp_ip4_spec.psrc), + htons(fs->h_u.tcp_ip4_spec.pdst)); + + gem_writel_n(bp, SCRT2, fs->location, 0); + + list_del(&item->list); + kfree(item); + bp->rx_fs_list.count--; + return 0; + } + } + + return -EINVAL; +} + +static int gem_get_flow_entry(struct net_device *netdev, + struct ethtool_rxnfc *cmd) +{ + struct macb *bp = netdev_priv(netdev); + struct ethtool_rx_fs_item *item; + + list_for_each_entry(item, &bp->rx_fs_list.list, list) { + if (item->fs.location == cmd->fs.location) { + memcpy(&cmd->fs, &item->fs, sizeof(cmd->fs)); + return 0; + } + } + return -EINVAL; +} + +static int gem_get_all_flow_entries(struct net_device *netdev, + struct ethtool_rxnfc *cmd, u32 *rule_locs) +{ + struct macb *bp = netdev_priv(netdev); + struct ethtool_rx_fs_item *item; + uint32_t cnt = 0; + + list_for_each_entry(item, &bp->rx_fs_list.list, list) { + if (cnt == cmd->rule_cnt) + return -EMSGSIZE; + rule_locs[cnt] = item->fs.location; + cnt++; + } + cmd->data = bp->max_tuples; + cmd->rule_cnt = cnt; + + return 0; +} + +static int gem_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct macb *bp = netdev_priv(netdev); + int ret = 0; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = bp->num_queues; + break; + case ETHTOOL_GRXCLSRLCNT: + cmd->rule_cnt = bp->rx_fs_list.count; + break; + case ETHTOOL_GRXCLSRULE: + ret = gem_get_flow_entry(netdev, cmd); + break; + case ETHTOOL_GRXCLSRLALL: + ret = gem_get_all_flow_entries(netdev, cmd, rule_locs); + break; + default: + netdev_err(netdev, + "Command parameter %d is not supported\n", cmd->cmd); + ret = -EOPNOTSUPP; + } + + return ret; +} + +static int gem_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) +{ + struct macb *bp = netdev_priv(netdev); + unsigned long flags; + int ret; + + spin_lock_irqsave(&bp->rx_fs_lock, flags); + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + if ((cmd->fs.location >= bp->max_tuples) + || (cmd->fs.ring_cookie >= bp->num_queues)) { + ret = -EINVAL; + break; + } + ret = gem_add_flow_filter(netdev, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + ret = gem_del_flow_filter(netdev, cmd); + break; + default: + netdev_err(netdev, + "Command parameter %d is not supported\n", cmd->cmd); + ret = -EOPNOTSUPP; + } + + spin_unlock_irqrestore(&bp->rx_fs_lock, flags); + return ret; +} + static const struct ethtool_ops macb_ethtool_ops = { .get_regs_len = macb_get_regs_len, .get_regs = macb_get_regs, @@ -2628,6 +2995,8 @@ static const struct ethtool_ops gem_ethtool_ops = { .set_link_ksettings = phy_ethtool_set_link_ksettings, .get_ringparam = macb_get_ringparam, .set_ringparam = macb_set_ringparam, + .get_rxnfc = gem_get_rxnfc, + .set_rxnfc = gem_set_rxnfc, }; static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) @@ -2685,6 +3054,12 @@ static int macb_set_features(struct net_device *netdev, gem_writel(bp, NCFGR, netcfg); } + /* RX Flow Filters */ + if ((changed & NETIF_F_NTUPLE) && macb_is_gem(bp)) { + bool turn_on = features & NETIF_F_NTUPLE; + + gem_enable_flow_filters(bp, turn_on); + } return 0; } @@ -2850,7 +3225,7 @@ static int macb_init(struct platform_device *pdev) struct macb *bp = netdev_priv(dev); struct macb_queue *queue; int err; - u32 val; + u32 val, reg; bp->tx_ring_size = DEFAULT_TX_RING_SIZE; bp->rx_ring_size = DEFAULT_RX_RING_SIZE; @@ -2865,15 +3240,20 @@ static int macb_init(struct platform_device *pdev) queue = &bp->queues[q]; queue->bp = bp; + netif_napi_add(dev, &queue->napi, macb_poll, 64); if (hw_q) { queue->ISR = GEM_ISR(hw_q - 1); queue->IER = GEM_IER(hw_q - 1); queue->IDR = GEM_IDR(hw_q - 1); queue->IMR = GEM_IMR(hw_q - 1); queue->TBQP = GEM_TBQP(hw_q - 1); + queue->RBQP = GEM_RBQP(hw_q - 1); + queue->RBQS = GEM_RBQS(hw_q - 1); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap & HW_DMA_CAP_64B) + if (bp->hw_dma_cap & HW_DMA_CAP_64B) { queue->TBQPH = GEM_TBQPH(hw_q - 1); + queue->RBQPH = GEM_RBQPH(hw_q - 1); + } #endif } else { /* queue0 uses legacy registers */ @@ -2882,9 +3262,12 @@ static int macb_init(struct platform_device *pdev) queue->IDR = MACB_IDR; queue->IMR = MACB_IMR; queue->TBQP = MACB_TBQP; + queue->RBQP = MACB_RBQP; #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap & HW_DMA_CAP_64B) + if (bp->hw_dma_cap & HW_DMA_CAP_64B) { queue->TBQPH = MACB_TBQPH; + queue->RBQPH = MACB_RBQPH; + } #endif } @@ -2908,7 +3291,6 @@ static int macb_init(struct platform_device *pdev) } dev->netdev_ops = &macb_netdev_ops; - netif_napi_add(dev, &bp->napi, macb_poll, 64); /* setup appropriated routines according to adapter type */ if (macb_is_gem(bp)) { @@ -2941,6 +3323,30 @@ static int macb_init(struct platform_device *pdev) dev->hw_features &= ~NETIF_F_SG; dev->features = dev->hw_features; + /* Check RX Flow Filters support. + * Max Rx flows set by availability of screeners & compare regs: + * each 4-tuple define requires 1 T2 screener reg + 3 compare regs + */ + reg = gem_readl(bp, DCFG8); + bp->max_tuples = min((GEM_BFEXT(SCR2CMP, reg) / 3), + GEM_BFEXT(T2SCR, reg)); + if (bp->max_tuples > 0) { + /* also needs one ethtype match to check IPv4 */ + if (GEM_BFEXT(SCR2ETH, reg) > 0) { + /* program this reg now */ + reg = 0; + reg = GEM_BFINS(ETHTCMP, (uint16_t)ETH_P_IP, reg); + gem_writel_n(bp, ETHT, SCRT2_ETHT, reg); + /* Filtering is supported in hw but don't enable it in kernel now */ + dev->hw_features |= NETIF_F_NTUPLE; + /* init Rx flow definitions */ + INIT_LIST_HEAD(&bp->rx_fs_list.list); + bp->rx_fs_list.count = 0; + spin_lock_init(&bp->rx_fs_lock); + } else + bp->max_tuples = 0; + } + if (!(bp->caps & MACB_CAPS_USRIO_DISABLED)) { val = 0; if (bp->phy_interface == PHY_INTERFACE_MODE_RGMII) @@ -2977,34 +3383,35 @@ static int macb_init(struct platform_device *pdev) static int at91ether_start(struct net_device *dev) { struct macb *lp = netdev_priv(dev); + struct macb_queue *q = &lp->queues[0]; struct macb_dma_desc *desc; dma_addr_t addr; u32 ctl; int i; - lp->rx_ring = dma_alloc_coherent(&lp->pdev->dev, + q->rx_ring = dma_alloc_coherent(&lp->pdev->dev, (AT91ETHER_MAX_RX_DESCR * macb_dma_desc_get_size(lp)), - &lp->rx_ring_dma, GFP_KERNEL); - if (!lp->rx_ring) + &q->rx_ring_dma, GFP_KERNEL); + if (!q->rx_ring) return -ENOMEM; - lp->rx_buffers = dma_alloc_coherent(&lp->pdev->dev, + q->rx_buffers = dma_alloc_coherent(&lp->pdev->dev, AT91ETHER_MAX_RX_DESCR * AT91ETHER_MAX_RBUFF_SZ, - &lp->rx_buffers_dma, GFP_KERNEL); - if (!lp->rx_buffers) { + &q->rx_buffers_dma, GFP_KERNEL); + if (!q->rx_buffers) { dma_free_coherent(&lp->pdev->dev, AT91ETHER_MAX_RX_DESCR * macb_dma_desc_get_size(lp), - lp->rx_ring, lp->rx_ring_dma); - lp->rx_ring = NULL; + q->rx_ring, q->rx_ring_dma); + q->rx_ring = NULL; return -ENOMEM; } - addr = lp->rx_buffers_dma; + addr = q->rx_buffers_dma; for (i = 0; i < AT91ETHER_MAX_RX_DESCR; i++) { - desc = macb_rx_desc(lp, i); + desc = macb_rx_desc(q, i); macb_set_addr(lp, desc, addr); desc->ctrl = 0; addr += AT91ETHER_MAX_RBUFF_SZ; @@ -3014,10 +3421,10 @@ static int at91ether_start(struct net_device *dev) desc->addr |= MACB_BIT(RX_WRAP); /* Reset buffer index */ - lp->rx_tail = 0; + q->rx_tail = 0; /* Program address of descriptor list in Rx Buffer Queue register */ - macb_writel(lp, RBQP, lp->rx_ring_dma); + macb_writel(lp, RBQP, q->rx_ring_dma); /* Enable Receive and Transmit */ ctl = macb_readl(lp, NCR); @@ -3064,6 +3471,7 @@ static int at91ether_open(struct net_device *dev) static int at91ether_close(struct net_device *dev) { struct macb *lp = netdev_priv(dev); + struct macb_queue *q = &lp->queues[0]; u32 ctl; /* Disable Receiver and Transmitter */ @@ -3084,13 +3492,13 @@ static int at91ether_close(struct net_device *dev) dma_free_coherent(&lp->pdev->dev, AT91ETHER_MAX_RX_DESCR * macb_dma_desc_get_size(lp), - lp->rx_ring, lp->rx_ring_dma); - lp->rx_ring = NULL; + q->rx_ring, q->rx_ring_dma); + q->rx_ring = NULL; dma_free_coherent(&lp->pdev->dev, AT91ETHER_MAX_RX_DESCR * AT91ETHER_MAX_RBUFF_SZ, - lp->rx_buffers, lp->rx_buffers_dma); - lp->rx_buffers = NULL; + q->rx_buffers, q->rx_buffers_dma); + q->rx_buffers = NULL; return 0; } @@ -3134,14 +3542,15 @@ static int at91ether_start_xmit(struct sk_buff *skb, struct net_device *dev) static void at91ether_rx(struct net_device *dev) { struct macb *lp = netdev_priv(dev); + struct macb_queue *q = &lp->queues[0]; struct macb_dma_desc *desc; unsigned char *p_recv; struct sk_buff *skb; unsigned int pktlen; - desc = macb_rx_desc(lp, lp->rx_tail); + desc = macb_rx_desc(q, q->rx_tail); while (desc->addr & MACB_BIT(RX_USED)) { - p_recv = lp->rx_buffers + lp->rx_tail * AT91ETHER_MAX_RBUFF_SZ; + p_recv = q->rx_buffers + q->rx_tail * AT91ETHER_MAX_RBUFF_SZ; pktlen = MACB_BF(RX_FRMLEN, desc->ctrl); skb = netdev_alloc_skb(dev, pktlen + 2); if (skb) { @@ -3163,12 +3572,12 @@ static void at91ether_rx(struct net_device *dev) desc->addr &= ~MACB_BIT(RX_USED); /* wrap after last buffer */ - if (lp->rx_tail == AT91ETHER_MAX_RX_DESCR - 1) - lp->rx_tail = 0; + if (q->rx_tail == AT91ETHER_MAX_RX_DESCR - 1) + q->rx_tail = 0; else - lp->rx_tail++; + q->rx_tail++; - desc = macb_rx_desc(lp, lp->rx_tail); + desc = macb_rx_desc(q, q->rx_tail); } } diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 462d0ce51240..efb9333c7cf8 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -18,6 +18,7 @@ #include <linux/netdevice.h> #include <linux/ethtool.h> +#include <linux/net_tstamp.h> #include "enic_res.h" #include "enic.h" @@ -578,6 +579,16 @@ static int enic_set_rxfh(struct net_device *netdev, const u32 *indir, return __enic_set_rsskey(enic); } +static int enic_get_ts_info(struct net_device *netdev, + struct ethtool_ts_info *info) +{ + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + + return 0; +} + static const struct ethtool_ops enic_ethtool_ops = { .get_drvinfo = enic_get_drvinfo, .get_msglevel = enic_get_msglevel, @@ -597,6 +608,7 @@ static const struct ethtool_ops enic_ethtool_ops = { .get_rxfh = enic_get_rxfh, .set_rxfh = enic_set_rxfh, .get_link_ksettings = enic_get_ksettings, + .get_ts_info = enic_get_ts_info, }; void enic_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index e130fb757e7b..d98676e43e03 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -856,6 +856,7 @@ static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb, if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS) netif_tx_stop_queue(txq); + skb_tx_timestamp(skb); if (!skb->xmit_more || netif_xmit_stopped(txq)) vnic_wq_doorbell(wq); diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index a73600dceb8b..a60a378b8b29 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -88,6 +88,7 @@ do { \ #define CPSW_VERSION_4 0x190112 #define HOST_PORT_NUM 0 +#define CPSW_ALE_PORTS_NUM 3 #define SLIVER_SIZE 0x40 #define CPSW1_HOST_PORT_OFFSET 0x028 @@ -352,6 +353,27 @@ struct cpsw_hw_stats { u32 rxdmaoverruns; }; +struct cpsw_slave_data { + struct device_node *phy_node; + char phy_id[MII_BUS_ID_SIZE]; + int phy_if; + u8 mac_addr[ETH_ALEN]; + u16 dual_emac_res_vlan; /* Reserved VLAN for DualEMAC */ +}; + +struct cpsw_platform_data { + struct cpsw_slave_data *slave_data; + u32 ss_reg_ofs; /* Subsystem control register offset */ + u32 channels; /* number of cpdma channels (symmetric) */ + u32 slaves; /* number of slave cpgmac ports */ + u32 active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */ + u32 ale_entries; /* ale table size */ + u32 bd_ram_size; /*buffer descriptor ram size */ + u32 mac_control; /* Mac control register */ + u16 default_vlan; /* Def VLAN for ALE lookup in VLAN aware mode*/ + bool dual_emac; /* Enable Dual EMAC mode */ +}; + struct cpsw_slave { void __iomem *regs; struct cpsw_sliver_regs __iomem *sliver; @@ -365,12 +387,12 @@ struct cpsw_slave { static inline u32 slave_read(struct cpsw_slave *slave, u32 offset) { - return __raw_readl(slave->regs + offset); + return readl_relaxed(slave->regs + offset); } static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset) { - __raw_writel(val, slave->regs + offset); + writel_relaxed(val, slave->regs + offset); } struct cpsw_vector { @@ -660,8 +682,8 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) static void cpsw_intr_enable(struct cpsw_common *cpsw) { - __raw_writel(0xFF, &cpsw->wr_regs->tx_en); - __raw_writel(0xFF, &cpsw->wr_regs->rx_en); + writel_relaxed(0xFF, &cpsw->wr_regs->tx_en); + writel_relaxed(0xFF, &cpsw->wr_regs->rx_en); cpdma_ctlr_int_ctrl(cpsw->dma, true); return; @@ -669,8 +691,8 @@ static void cpsw_intr_enable(struct cpsw_common *cpsw) static void cpsw_intr_disable(struct cpsw_common *cpsw) { - __raw_writel(0, &cpsw->wr_regs->tx_en); - __raw_writel(0, &cpsw->wr_regs->rx_en); + writel_relaxed(0, &cpsw->wr_regs->tx_en); + writel_relaxed(0, &cpsw->wr_regs->rx_en); cpdma_ctlr_int_ctrl(cpsw->dma, false); return; @@ -949,18 +971,14 @@ static inline void soft_reset(const char *module, void __iomem *reg) { unsigned long timeout = jiffies + HZ; - __raw_writel(1, reg); + writel_relaxed(1, reg); do { cpu_relax(); - } while ((__raw_readl(reg) & 1) && time_after(timeout, jiffies)); + } while ((readl_relaxed(reg) & 1) && time_after(timeout, jiffies)); - WARN(__raw_readl(reg) & 1, "failed to soft-reset %s\n", module); + WARN(readl_relaxed(reg) & 1, "failed to soft-reset %s\n", module); } -#define mac_hi(mac) (((mac)[0] << 0) | ((mac)[1] << 8) | \ - ((mac)[2] << 16) | ((mac)[3] << 24)) -#define mac_lo(mac) (((mac)[4] << 0) | ((mac)[5] << 8)) - static void cpsw_set_slave_mac(struct cpsw_slave *slave, struct cpsw_priv *priv) { @@ -1015,7 +1033,7 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave, if (mac_control != slave->mac_control) { phy_print_status(phy); - __raw_writel(mac_control, &slave->sliver->mac_control); + writel_relaxed(mac_control, &slave->sliver->mac_control); } slave->mac_control = mac_control; @@ -1278,7 +1296,7 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) soft_reset_slave(slave); /* setup priority mapping */ - __raw_writel(RX_PRIORITY_MAPPING, &slave->sliver->rx_pri_map); + writel_relaxed(RX_PRIORITY_MAPPING, &slave->sliver->rx_pri_map); switch (cpsw->version) { case CPSW_VERSION_1: @@ -1304,7 +1322,7 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) } /* setup max packet size, and mac address */ - __raw_writel(cpsw->rx_packet_max, &slave->sliver->rx_maxlen); + writel_relaxed(cpsw->rx_packet_max, &slave->sliver->rx_maxlen); cpsw_set_slave_mac(slave, priv); slave->mac_control = 0; /* no link yet */ @@ -1395,9 +1413,9 @@ static void cpsw_init_host_port(struct cpsw_priv *priv) writel(fifo_mode, &cpsw->host_port_regs->tx_in_ctl); /* setup host port priority mapping */ - __raw_writel(CPDMA_TX_PRIORITY_MAP, - &cpsw->host_port_regs->cpdma_tx_pri_map); - __raw_writel(0, &cpsw->host_port_regs->cpdma_rx_chan_map); + writel_relaxed(CPDMA_TX_PRIORITY_MAP, + &cpsw->host_port_regs->cpdma_tx_pri_map); + writel_relaxed(0, &cpsw->host_port_regs->cpdma_rx_chan_map); cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_PORT_STATE, ALE_PORT_STATE_FORWARD); @@ -1514,10 +1532,10 @@ static int cpsw_ndo_open(struct net_device *ndev) /* initialize shared resources for every ndev */ if (!cpsw->usage_count) { /* disable priority elevation */ - __raw_writel(0, &cpsw->regs->ptype); + writel_relaxed(0, &cpsw->regs->ptype); /* enable statistics collection only on all ports */ - __raw_writel(0x7, &cpsw->regs->stat_port_en); + writel_relaxed(0x7, &cpsw->regs->stat_port_en); /* Enable internal fifo flow control */ writel(0x7, &cpsw->regs->flow_control); @@ -1701,7 +1719,7 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv) slave_write(slave, mtype, CPSW2_TS_SEQ_MTYPE); slave_write(slave, ctrl, CPSW2_CONTROL); - __raw_writel(ETH_P_1588, &cpsw->regs->ts_ltype); + writel_relaxed(ETH_P_1588, &cpsw->regs->ts_ltype); } static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) @@ -2298,7 +2316,6 @@ static int cpsw_check_ch_settings(struct cpsw_common *cpsw, static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx) { - int (*poll)(struct napi_struct *, int); struct cpsw_common *cpsw = priv->cpsw; void (*handler)(void *, int, int); struct netdev_queue *queue; @@ -2309,12 +2326,10 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx) ch = &cpsw->rx_ch_num; vec = cpsw->rxv; handler = cpsw_rx_handler; - poll = cpsw_rx_poll; } else { ch = &cpsw->tx_ch_num; vec = cpsw->txv; handler = cpsw_tx_handler; - poll = cpsw_tx_poll; } while (*ch < ch_num) { @@ -3060,7 +3075,7 @@ static int cpsw_probe(struct platform_device *pdev) ale_params.dev = &pdev->dev; ale_params.ale_ageout = ale_ageout; ale_params.ale_entries = data->ale_entries; - ale_params.ale_ports = data->slaves; + ale_params.ale_ports = CPSW_ALE_PORTS_NUM; cpsw->ale = cpsw_ale_create(&ale_params); if (!cpsw->ale) { @@ -3072,14 +3087,14 @@ static int cpsw_probe(struct platform_device *pdev) cpsw->cpts = cpts_create(cpsw->dev, cpts_regs, cpsw->dev->of_node); if (IS_ERR(cpsw->cpts)) { ret = PTR_ERR(cpsw->cpts); - goto clean_ale_ret; + goto clean_dma_ret; } ndev->irq = platform_get_irq(pdev, 1); if (ndev->irq < 0) { dev_err(priv->dev, "error getting irq resource\n"); ret = ndev->irq; - goto clean_ale_ret; + goto clean_dma_ret; } of_id = of_match_device(cpsw_of_mtable, &pdev->dev); @@ -3103,7 +3118,7 @@ static int cpsw_probe(struct platform_device *pdev) if (ret) { dev_err(priv->dev, "error registering net device\n"); ret = -ENODEV; - goto clean_ale_ret; + goto clean_dma_ret; } if (cpsw->data.dual_emac) { @@ -3126,7 +3141,7 @@ static int cpsw_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 1); if (irq < 0) { ret = irq; - goto clean_ale_ret; + goto clean_dma_ret; } cpsw->irqs_table[0] = irq; @@ -3134,14 +3149,14 @@ static int cpsw_probe(struct platform_device *pdev) 0, dev_name(&pdev->dev), cpsw); if (ret < 0) { dev_err(priv->dev, "error attaching irq (%d)\n", ret); - goto clean_ale_ret; + goto clean_dma_ret; } /* TX IRQ */ irq = platform_get_irq(pdev, 2); if (irq < 0) { ret = irq; - goto clean_ale_ret; + goto clean_dma_ret; } cpsw->irqs_table[1] = irq; @@ -3149,7 +3164,7 @@ static int cpsw_probe(struct platform_device *pdev) 0, dev_name(&pdev->dev), cpsw); if (ret < 0) { dev_err(priv->dev, "error attaching irq (%d)\n", ret); - goto clean_ale_ret; + goto clean_dma_ret; } cpsw_notice(priv, probe, @@ -3162,8 +3177,6 @@ static int cpsw_probe(struct platform_device *pdev) clean_unregister_netdev_ret: unregister_netdev(ndev); -clean_ale_ret: - cpsw_ale_destroy(cpsw->ale); clean_dma_ret: cpdma_ctlr_destroy(cpsw->dma); clean_dt_ret: @@ -3193,7 +3206,6 @@ static int cpsw_remove(struct platform_device *pdev) unregister_netdev(ndev); cpts_release(cpsw->cpts); - cpsw_ale_destroy(cpsw->ale); cpdma_ctlr_destroy(cpsw->dma); cpsw_remove_dt(pdev); pm_runtime_put_sync(&pdev->dev); diff --git a/drivers/net/ethernet/ti/cpsw.h b/drivers/net/ethernet/ti/cpsw.h index 6c3037aa2cd3..cf111db3dc27 100644 --- a/drivers/net/ethernet/ti/cpsw.h +++ b/drivers/net/ethernet/ti/cpsw.h @@ -17,26 +17,9 @@ #include <linux/if_ether.h> #include <linux/phy.h> -struct cpsw_slave_data { - struct device_node *phy_node; - char phy_id[MII_BUS_ID_SIZE]; - int phy_if; - u8 mac_addr[ETH_ALEN]; - u16 dual_emac_res_vlan; /* Reserved VLAN for DualEMAC */ -}; - -struct cpsw_platform_data { - struct cpsw_slave_data *slave_data; - u32 ss_reg_ofs; /* Subsystem control register offset */ - u32 channels; /* number of cpdma channels (symmetric) */ - u32 slaves; /* number of slave cpgmac ports */ - u32 active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */ - u32 ale_entries; /* ale table size */ - u32 bd_ram_size; /*buffer descriptor ram size */ - u32 mac_control; /* Mac control register */ - u16 default_vlan; /* Def VLAN for ALE lookup in VLAN aware mode*/ - bool dual_emac; /* Enable Dual EMAC mode */ -}; +#define mac_hi(mac) (((mac)[0] << 0) | ((mac)[1] << 8) | \ + ((mac)[2] << 16) | ((mac)[3] << 24)) +#define mac_lo(mac) (((mac)[4] << 0) | ((mac)[5] << 8)) void cpsw_phy_sel(struct device *dev, phy_interface_t phy_mode, int slave); int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr); diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index b432a75fb874..93dc05c194d3 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -150,11 +150,11 @@ static int cpsw_ale_read(struct cpsw_ale *ale, int idx, u32 *ale_entry) WARN_ON(idx > ale->params.ale_entries); - __raw_writel(idx, ale->params.ale_regs + ALE_TABLE_CONTROL); + writel_relaxed(idx, ale->params.ale_regs + ALE_TABLE_CONTROL); for (i = 0; i < ALE_ENTRY_WORDS; i++) - ale_entry[i] = __raw_readl(ale->params.ale_regs + - ALE_TABLE + 4 * i); + ale_entry[i] = readl_relaxed(ale->params.ale_regs + + ALE_TABLE + 4 * i); return idx; } @@ -166,11 +166,11 @@ static int cpsw_ale_write(struct cpsw_ale *ale, int idx, u32 *ale_entry) WARN_ON(idx > ale->params.ale_entries); for (i = 0; i < ALE_ENTRY_WORDS; i++) - __raw_writel(ale_entry[i], ale->params.ale_regs + - ALE_TABLE + 4 * i); + writel_relaxed(ale_entry[i], ale->params.ale_regs + + ALE_TABLE + 4 * i); - __raw_writel(idx | ALE_TABLE_WRITE, ale->params.ale_regs + - ALE_TABLE_CONTROL); + writel_relaxed(idx | ALE_TABLE_WRITE, ale->params.ale_regs + + ALE_TABLE_CONTROL); return idx; } @@ -723,7 +723,7 @@ int cpsw_ale_control_set(struct cpsw_ale *ale, int port, int control, if (info->port_offset == 0 && info->port_shift == 0) port = 0; /* global, port is a dont care */ - if (port < 0 || port > ale->params.ale_ports) + if (port < 0 || port >= ale->params.ale_ports) return -EINVAL; mask = BITMASK(info->bits); @@ -733,9 +733,9 @@ int cpsw_ale_control_set(struct cpsw_ale *ale, int port, int control, offset = info->offset + (port * info->port_offset); shift = info->shift + (port * info->port_shift); - tmp = __raw_readl(ale->params.ale_regs + offset); + tmp = readl_relaxed(ale->params.ale_regs + offset); tmp = (tmp & ~(mask << shift)) | (value << shift); - __raw_writel(tmp, ale->params.ale_regs + offset); + writel_relaxed(tmp, ale->params.ale_regs + offset); return 0; } @@ -754,13 +754,13 @@ int cpsw_ale_control_get(struct cpsw_ale *ale, int port, int control) if (info->port_offset == 0 && info->port_shift == 0) port = 0; /* global, port is a dont care */ - if (port < 0 || port > ale->params.ale_ports) + if (port < 0 || port >= ale->params.ale_ports) return -EINVAL; offset = info->offset + (port * info->port_offset); shift = info->shift + (port * info->port_shift); - tmp = __raw_readl(ale->params.ale_regs + offset) >> shift; + tmp = readl_relaxed(ale->params.ale_regs + offset) >> shift; return tmp & BITMASK(info->bits); } EXPORT_SYMBOL_GPL(cpsw_ale_control_get); @@ -779,9 +779,37 @@ static void cpsw_ale_timer(struct timer_list *t) void cpsw_ale_start(struct cpsw_ale *ale) { + cpsw_ale_control_set(ale, 0, ALE_ENABLE, 1); + cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1); + + timer_setup(&ale->timer, cpsw_ale_timer, 0); + if (ale->ageout) { + ale->timer.expires = jiffies + ale->ageout; + add_timer(&ale->timer); + } +} +EXPORT_SYMBOL_GPL(cpsw_ale_start); + +void cpsw_ale_stop(struct cpsw_ale *ale) +{ + del_timer_sync(&ale->timer); + cpsw_ale_control_set(ale, 0, ALE_ENABLE, 0); +} +EXPORT_SYMBOL_GPL(cpsw_ale_stop); + +struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params) +{ + struct cpsw_ale *ale; u32 rev, ale_entries; - rev = __raw_readl(ale->params.ale_regs + ALE_IDVER); + ale = devm_kzalloc(params->dev, sizeof(*ale), GFP_KERNEL); + if (!ale) + return NULL; + + ale->params = *params; + ale->ageout = ale->params.ale_ageout * HZ; + + rev = readl_relaxed(ale->params.ale_regs + ALE_IDVER); if (!ale->params.major_ver_mask) ale->params.major_ver_mask = 0xff; ale->version = @@ -793,8 +821,8 @@ void cpsw_ale_start(struct cpsw_ale *ale) if (!ale->params.ale_entries) { ale_entries = - __raw_readl(ale->params.ale_regs + ALE_STATUS) & - ALE_STATUS_SIZE_MASK; + readl_relaxed(ale->params.ale_regs + ALE_STATUS) & + ALE_STATUS_SIZE_MASK; /* ALE available on newer NetCP switches has introduced * a register, ALE_STATUS, to indicate the size of ALE * table which shows the size as a multiple of 1024 entries. @@ -816,9 +844,9 @@ void cpsw_ale_start(struct cpsw_ale *ale) "ALE Table size %ld\n", ale->params.ale_entries); /* set default bits for existing h/w */ - ale->port_mask_bits = 3; - ale->port_num_bits = 2; - ale->vlan_field_bits = 3; + ale->port_mask_bits = ale->params.ale_ports; + ale->port_num_bits = order_base_2(ale->params.ale_ports); + ale->vlan_field_bits = ale->params.ale_ports; /* Set defaults override for ALE on NetCP NU switch and for version * 1R3 @@ -847,57 +875,12 @@ void cpsw_ale_start(struct cpsw_ale *ale) ale_controls[ALE_PORT_UNTAGGED_EGRESS].shift = 0; ale_controls[ALE_PORT_UNTAGGED_EGRESS].offset = ALE_UNKNOWNVLAN_FORCE_UNTAG_EGRESS; - ale->port_mask_bits = ale->params.ale_ports; - ale->port_num_bits = ale->params.ale_ports - 1; - ale->vlan_field_bits = ale->params.ale_ports; - } else if (ale->version == ALE_VERSION_1R3) { - ale->port_mask_bits = ale->params.ale_ports; - ale->port_num_bits = 3; - ale->vlan_field_bits = ale->params.ale_ports; } - cpsw_ale_control_set(ale, 0, ALE_ENABLE, 1); - cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1); - - timer_setup(&ale->timer, cpsw_ale_timer, 0); - if (ale->ageout) { - ale->timer.expires = jiffies + ale->ageout; - add_timer(&ale->timer); - } -} -EXPORT_SYMBOL_GPL(cpsw_ale_start); - -void cpsw_ale_stop(struct cpsw_ale *ale) -{ - del_timer_sync(&ale->timer); -} -EXPORT_SYMBOL_GPL(cpsw_ale_stop); - -struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params) -{ - struct cpsw_ale *ale; - - ale = kzalloc(sizeof(*ale), GFP_KERNEL); - if (!ale) - return NULL; - - ale->params = *params; - ale->ageout = ale->params.ale_ageout * HZ; - return ale; } EXPORT_SYMBOL_GPL(cpsw_ale_create); -int cpsw_ale_destroy(struct cpsw_ale *ale) -{ - if (!ale) - return -EINVAL; - cpsw_ale_control_set(ale, 0, ALE_ENABLE, 0); - kfree(ale); - return 0; -} -EXPORT_SYMBOL_GPL(cpsw_ale_destroy); - void cpsw_ale_dump(struct cpsw_ale *ale, u32 *data) { int i; diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h index 25d24e8d0904..d4fe9016429b 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.h +++ b/drivers/net/ethernet/ti/cpsw_ale.h @@ -100,7 +100,6 @@ enum cpsw_ale_port_state { #define ALE_ENTRY_WORDS DIV_ROUND_UP(ALE_ENTRY_BITS, 32) struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params); -int cpsw_ale_destroy(struct cpsw_ale *ale); void cpsw_ale_start(struct cpsw_ale *ale); void cpsw_ale_stop(struct cpsw_ale *ale); diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index e831c49713ee..56dbc0b9fedc 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -27,6 +27,7 @@ #include <linux/net_tstamp.h> #include <linux/ethtool.h> +#include "cpsw.h" #include "cpsw_ale.h" #include "netcp.h" #include "cpts.h" @@ -2047,10 +2048,6 @@ static const struct ethtool_ops keystone_ethtool_ops = { .get_ts_info = keystone_get_ts_info, }; -#define mac_hi(mac) (((mac)[0] << 0) | ((mac)[1] << 8) | \ - ((mac)[2] << 16) | ((mac)[3] << 24)) -#define mac_lo(mac) (((mac)[4] << 0) | ((mac)[5] << 8)) - static void gbe_set_slave_mac(struct gbe_slave *slave, struct gbe_intf *gbe_intf) { @@ -3692,7 +3689,6 @@ static int gbe_remove(struct netcp_device *netcp_device, void *inst_priv) del_timer_sync(&gbe_dev->timer); cpts_release(gbe_dev->cpts); cpsw_ale_stop(gbe_dev->ale); - cpsw_ale_destroy(gbe_dev->ale); netcp_txpipe_close(&gbe_dev->tx_pipe); free_secondary_ports(gbe_dev); diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 88ddfb92122b..3d940c67ea94 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -146,7 +146,6 @@ struct hv_netvsc_packet { struct netvsc_device_info { unsigned char mac_adr[ETH_ALEN]; - int ring_size; u32 num_chn; u32 send_sections; u32 recv_sections; @@ -188,6 +187,9 @@ struct rndis_message; struct netvsc_device; struct net_device_context; +extern u32 netvsc_ring_bytes; +extern struct reciprocal_value netvsc_ring_reciprocal; + struct netvsc_device *netvsc_device_add(struct hv_device *device, const struct netvsc_device_info *info); int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx); @@ -804,8 +806,6 @@ struct netvsc_device { struct rndis_device *extension; - int ring_size; - u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ u32 pkt_align; /* alignment bytes, e.g. 8 */ @@ -1425,32 +1425,6 @@ struct rndis_message { (sizeof(msg) + (sizeof(struct rndis_message) - \ sizeof(union rndis_message_container))) -/* get pointer to info buffer with message pointer */ -#define MESSAGE_TO_INFO_BUFFER(msg) \ - (((unsigned char *)(msg)) + msg->info_buf_offset) - -/* get pointer to status buffer with message pointer */ -#define MESSAGE_TO_STATUS_BUFFER(msg) \ - (((unsigned char *)(msg)) + msg->status_buf_offset) - -/* get pointer to OOBD buffer with message pointer */ -#define MESSAGE_TO_OOBD_BUFFER(msg) \ - (((unsigned char *)(msg)) + msg->oob_data_offset) - -/* get pointer to data buffer with message pointer */ -#define MESSAGE_TO_DATA_BUFFER(msg) \ - (((unsigned char *)(msg)) + msg->per_pkt_info_offset) - -/* get pointer to contained message from NDIS_MESSAGE pointer */ -#define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_msg) \ - ((void *) &rndis_msg->msg) - -/* get pointer to contained message from NDIS_MESSAGE pointer */ -#define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_msg) \ - ((void *) rndis_msg) - - - #define RNDIS_HEADER_SIZE (sizeof(struct rndis_message) - \ sizeof(union rndis_message_container)) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index bfc79698b8f4..e4bcd202a56a 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -31,6 +31,7 @@ #include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <linux/prefetch.h> +#include <linux/reciprocal_div.h> #include <asm/sync_bitops.h> @@ -588,14 +589,11 @@ void netvsc_device_remove(struct hv_device *device) * Get the percentage of available bytes to write in the ring. * The return value is in range from 0 to 100. */ -static inline u32 hv_ringbuf_avail_percent( - struct hv_ring_buffer_info *ring_info) +static u32 hv_ringbuf_avail_percent(const struct hv_ring_buffer_info *ring_info) { - u32 avail_read, avail_write; + u32 avail_write = hv_get_bytes_to_write(ring_info); - hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write); - - return avail_write * 100 / ring_info->ring_datasize; + return reciprocal_divide(avail_write * 100, netvsc_ring_reciprocal); } static inline void netvsc_free_send_slot(struct netvsc_device *net_device, @@ -712,11 +710,12 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, int i; u32 msg_size = 0; u32 padding = 0; - u32 remain = packet->total_data_buflen % net_device->pkt_align; u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt : packet->page_buf_cnt; + u32 remain; /* Add padding */ + remain = packet->total_data_buflen & (net_device->pkt_align - 1); if (skb->xmit_more && remain && !packet->cp_partial) { padding = net_device->pkt_align - remain; rndis_msg->msg_len += padding; @@ -848,7 +847,6 @@ int netvsc_send(struct net_device_context *ndev_ctx, struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL; struct sk_buff *msd_skb = NULL; bool try_batch; - bool xmit_more = (skb != NULL) ? skb->xmit_more : false; /* If device is rescinded, return error and packet will get dropped. */ if (unlikely(!net_device || net_device->destroy)) @@ -922,7 +920,7 @@ int netvsc_send(struct net_device_context *ndev_ctx, if (msdp->skb) dev_consume_skb_any(msdp->skb); - if (xmit_more && !packet->cp_partial) { + if (skb->xmit_more && !packet->cp_partial) { msdp->skb = skb; msdp->pkt = packet; msdp->count++; @@ -1249,7 +1247,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, const struct netvsc_device_info *device_info) { int i, ret = 0; - int ring_size = device_info->ring_size; struct netvsc_device *net_device; struct net_device *ndev = hv_get_drvdata(device); struct net_device_context *net_device_ctx = netdev_priv(ndev); @@ -1261,8 +1258,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) net_device_ctx->tx_table[i] = 0; - net_device->ring_size = ring_size; - /* Because the device uses NAPI, all the interrupt batching and * control is done via Net softirq, not the channel handling */ @@ -1289,10 +1284,9 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, netvsc_poll, NAPI_POLL_WEIGHT); /* Open the channel */ - ret = vmbus_open(device->channel, ring_size * PAGE_SIZE, - ring_size * PAGE_SIZE, NULL, 0, - netvsc_channel_cb, - net_device->chan_table); + ret = vmbus_open(device->channel, netvsc_ring_bytes, + netvsc_ring_bytes, NULL, 0, + netvsc_channel_cb, net_device->chan_table); if (ret != 0) { netif_napi_del(&net_device->chan_table[0].napi); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5129647d420c..dc70de674ca9 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -35,6 +35,7 @@ #include <linux/slab.h> #include <linux/rtnetlink.h> #include <linux/netpoll.h> +#include <linux/reciprocal_div.h> #include <net/arp.h> #include <net/route.h> @@ -54,9 +55,11 @@ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) -static int ring_size = 128; -module_param(ring_size, int, S_IRUGO); +static unsigned int ring_size __ro_after_init = 128; +module_param(ring_size, uint, S_IRUGO); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); +unsigned int netvsc_ring_bytes __ro_after_init; +struct reciprocal_value netvsc_ring_reciprocal __ro_after_init; static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | NETIF_MSG_IFUP | @@ -174,17 +177,15 @@ out: return ret; } -static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, - int pkt_type) +static inline void *init_ppi_data(struct rndis_message *msg, + u32 ppi_size, u32 pkt_type) { - struct rndis_packet *rndis_pkt; + struct rndis_packet *rndis_pkt = &msg->msg.pkt; struct rndis_per_packet_info *ppi; - rndis_pkt = &msg->msg.pkt; rndis_pkt->data_offset += ppi_size; - - ppi = (struct rndis_per_packet_info *)((void *)rndis_pkt + - rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_len); + ppi = (void *)rndis_pkt + rndis_pkt->per_pkt_info_offset + + rndis_pkt->per_pkt_info_len; ppi->size = ppi_size; ppi->type = pkt_type; @@ -192,7 +193,7 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, rndis_pkt->per_pkt_info_len += ppi_size; - return ppi; + return ppi + 1; } /* Azure hosts don't support non-TCP port numbers in hashing for fragmented @@ -469,10 +470,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) int ret; unsigned int num_data_pgs; struct rndis_message *rndis_msg; - struct rndis_packet *rndis_pkt; struct net_device *vf_netdev; u32 rndis_msg_size; - struct rndis_per_packet_info *ppi; u32 hash; struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT]; @@ -527,34 +526,36 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) rndis_msg = (struct rndis_message *)skb->head; - memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE); - /* Add the rndis header */ rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; rndis_msg->msg_len = packet->total_data_buflen; - rndis_pkt = &rndis_msg->msg.pkt; - rndis_pkt->data_offset = sizeof(struct rndis_packet); - rndis_pkt->data_len = packet->total_data_buflen; - rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet); + + rndis_msg->msg.pkt = (struct rndis_packet) { + .data_offset = sizeof(struct rndis_packet), + .data_len = packet->total_data_buflen, + .per_pkt_info_offset = sizeof(struct rndis_packet), + }; rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); hash = skb_get_hash_raw(skb); if (hash != 0 && net->real_num_tx_queues > 1) { + u32 *hash_info; + rndis_msg_size += NDIS_HASH_PPI_SIZE; - ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, - NBL_HASH_VALUE); - *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; + hash_info = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, + NBL_HASH_VALUE); + *hash_info = hash; } if (skb_vlan_tag_present(skb)) { struct ndis_pkt_8021q_info *vlan; rndis_msg_size += NDIS_VLAN_PPI_SIZE; - ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, - IEEE_8021Q_INFO); + vlan = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, + IEEE_8021Q_INFO); - vlan = (void *)ppi + ppi->ppi_offset; + vlan->value = 0; vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK; vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; @@ -564,11 +565,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) struct ndis_tcp_lso_info *lso_info; rndis_msg_size += NDIS_LSO_PPI_SIZE; - ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, - TCP_LARGESEND_PKTINFO); - - lso_info = (void *)ppi + ppi->ppi_offset; + lso_info = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, + TCP_LARGESEND_PKTINFO); + lso_info->value = 0; lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; if (skb->protocol == htons(ETH_P_IP)) { lso_info->lso_v2_transmit.ip_version = @@ -593,12 +593,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) struct ndis_tcp_ip_checksum_info *csum_info; rndis_msg_size += NDIS_CSUM_PPI_SIZE; - ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, - TCPIP_CHKSUM_PKTINFO); - - csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + - ppi->ppi_offset); + csum_info = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, + TCPIP_CHKSUM_PKTINFO); + csum_info->value = 0; csum_info->transmit.tcp_header_offset = skb_transport_offset(skb); if (skb->protocol == htons(ETH_P_IP)) { @@ -860,7 +858,6 @@ static int netvsc_set_channels(struct net_device *net, memset(&device_info, 0, sizeof(device_info)); device_info.num_chn = count; - device_info.ring_size = ring_size; device_info.send_sections = nvdev->send_section_cnt; device_info.send_section_size = nvdev->send_section_size; device_info.recv_sections = nvdev->recv_section_cnt; @@ -975,7 +972,6 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) rndis_filter_close(nvdev); memset(&device_info, 0, sizeof(device_info)); - device_info.ring_size = ring_size; device_info.num_chn = nvdev->num_chn; device_info.send_sections = nvdev->send_section_cnt; device_info.send_section_size = nvdev->send_section_size; @@ -1539,7 +1535,6 @@ static int netvsc_set_ringparam(struct net_device *ndev, memset(&device_info, 0, sizeof(device_info)); device_info.num_chn = nvdev->num_chn; - device_info.ring_size = ring_size; device_info.send_sections = new_tx; device_info.send_section_size = nvdev->send_section_size; device_info.recv_sections = new_rx; @@ -1995,7 +1990,6 @@ static int netvsc_probe(struct hv_device *dev, /* Notify the netvsc driver of the new device */ memset(&device_info, 0, sizeof(device_info)); - device_info.ring_size = ring_size; device_info.num_chn = VRSS_CHANNEL_DEFAULT; device_info.send_sections = NETVSC_DEFAULT_TX; device_info.send_section_size = NETVSC_SEND_SECTION_SIZE; @@ -2158,11 +2152,13 @@ static int __init netvsc_drv_init(void) if (ring_size < RING_SIZE_MIN) { ring_size = RING_SIZE_MIN; - pr_info("Increased ring_size to %d (min allowed)\n", + pr_info("Increased ring_size to %u (min allowed)\n", ring_size); } - ret = vmbus_driver_register(&netvsc_drv); + netvsc_ring_bytes = ring_size * PAGE_SIZE; + netvsc_ring_reciprocal = reciprocal_value(netvsc_ring_bytes); + ret = vmbus_driver_register(&netvsc_drv); if (ret) return ret; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 7b637c7dd1e5..673492063307 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1040,8 +1040,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) /* Set the channel before opening.*/ nvchan->channel = new_sc; - ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE, - nvscdev->ring_size * PAGE_SIZE, NULL, 0, + ret = vmbus_open(new_sc, netvsc_ring_bytes, + netvsc_ring_bytes, NULL, 0, netvsc_channel_cb, nvchan); if (ret == 0) napi_enable(&nvchan->napi); diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 30cb803e2fe5..2469df118fbf 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -850,6 +850,19 @@ static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) return ipvlan_del_addr(ipvlan, ip6_addr, true); } +static bool ipvlan_is_valid_dev(const struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + + if (!netif_is_ipvlan(dev)) + return false; + + if (!ipvlan || !ipvlan->port) + return false; + + return true; +} + static int ipvlan_addr6_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -857,10 +870,7 @@ static int ipvlan_addr6_event(struct notifier_block *unused, struct net_device *dev = (struct net_device *)if6->idev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); - if (!netif_is_ipvlan(dev)) - return NOTIFY_DONE; - - if (!ipvlan || !ipvlan->port) + if (!ipvlan_is_valid_dev(dev)) return NOTIFY_DONE; switch (event) { @@ -888,10 +898,7 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused, if (in_softirq()) return NOTIFY_DONE; - if (!netif_is_ipvlan(dev)) - return NOTIFY_DONE; - - if (!ipvlan || !ipvlan->port) + if (!ipvlan_is_valid_dev(dev)) return NOTIFY_DONE; switch (event) { @@ -932,10 +939,7 @@ static int ipvlan_addr4_event(struct notifier_block *unused, struct ipvl_dev *ipvlan = netdev_priv(dev); struct in_addr ip4_addr; - if (!netif_is_ipvlan(dev)) - return NOTIFY_DONE; - - if (!ipvlan || !ipvlan->port) + if (!ipvlan_is_valid_dev(dev)) return NOTIFY_DONE; switch (event) { @@ -961,10 +965,7 @@ static int ipvlan_addr4_validator_event(struct notifier_block *unused, struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); - if (!netif_is_ipvlan(dev)) - return NOTIFY_DONE; - - if (!ipvlan || !ipvlan->port) + if (!ipvlan_is_valid_dev(dev)) return NOTIFY_DONE; switch (event) { diff --git a/drivers/net/phy/amd.c b/drivers/net/phy/amd.c index 18141c022b13..6fe5dc9201d0 100644 --- a/drivers/net/phy/amd.c +++ b/drivers/net/phy/amd.c @@ -68,8 +68,6 @@ static struct phy_driver am79c_driver[] = { { .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = am79c_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = am79c_ack_interrupt, .config_intr = am79c_config_intr, } }; diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 5f93e6add563..de7dd6566df7 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -408,8 +408,6 @@ static struct phy_driver at803x_driver[] = { .resume = at803x_resume, .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = at803x_ack_interrupt, .config_intr = at803x_config_intr, }, { @@ -426,8 +424,6 @@ static struct phy_driver at803x_driver[] = { .resume = at803x_resume, .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = at803x_ack_interrupt, .config_intr = at803x_config_intr, }, { @@ -443,8 +439,6 @@ static struct phy_driver at803x_driver[] = { .resume = at803x_resume, .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .aneg_done = at803x_aneg_done, .ack_interrupt = &at803x_ack_interrupt, .config_intr = &at803x_config_intr, diff --git a/drivers/net/phy/bcm-cygnus.c b/drivers/net/phy/bcm-cygnus.c index 3fe8cc5c177e..6838129839ca 100644 --- a/drivers/net/phy/bcm-cygnus.c +++ b/drivers/net/phy/bcm-cygnus.c @@ -136,8 +136,6 @@ static struct phy_driver bcm_cygnus_phy_driver[] = { .name = "Broadcom Cygnus PHY", .features = PHY_GBIT_FEATURES, .config_init = bcm_cygnus_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, .suspend = genphy_suspend, diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c index b0492ef2cdaa..cf14613745c9 100644 --- a/drivers/net/phy/bcm63xx.c +++ b/drivers/net/phy/bcm63xx.c @@ -69,8 +69,6 @@ static struct phy_driver bcm63xx_driver[] = { .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause), .flags = PHY_HAS_INTERRUPT | PHY_IS_INTERNAL, .config_init = bcm63xx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm63xx_config_intr, }, { @@ -81,8 +79,6 @@ static struct phy_driver bcm63xx_driver[] = { .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause), .flags = PHY_HAS_INTERRUPT | PHY_IS_INTERNAL, .config_init = bcm63xx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm63xx_config_intr, } }; diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 8b33f688ac8a..421feb8f92fe 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -611,8 +611,6 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .features = PHY_GBIT_FEATURES, \ .flags = PHY_IS_INTERNAL, \ .config_init = bcm7xxx_28nm_config_init, \ - .config_aneg = genphy_config_aneg, \ - .read_status = genphy_read_status, \ .resume = bcm7xxx_28nm_resume, \ .get_tunable = bcm7xxx_28nm_get_tunable, \ .set_tunable = bcm7xxx_28nm_set_tunable, \ @@ -630,8 +628,6 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .features = PHY_BASIC_FEATURES, \ .flags = PHY_IS_INTERNAL, \ .config_init = bcm7xxx_28nm_ephy_config_init, \ - .config_aneg = genphy_config_aneg, \ - .read_status = genphy_read_status, \ .resume = bcm7xxx_28nm_ephy_resume, \ .get_sset_count = bcm_phy_get_sset_count, \ .get_strings = bcm_phy_get_strings, \ @@ -647,8 +643,6 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .features = PHY_BASIC_FEATURES, \ .flags = PHY_IS_INTERNAL, \ .config_init = bcm7xxx_config_init, \ - .config_aneg = genphy_config_aneg, \ - .read_status = genphy_read_status, \ .suspend = bcm7xxx_suspend, \ .resume = bcm7xxx_config_init, \ } diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index d7ed69deabfb..a8f69c5777bc 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -548,8 +548,6 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { @@ -559,8 +557,6 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { @@ -570,8 +566,6 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { @@ -581,8 +575,6 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { @@ -592,8 +584,6 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { @@ -603,8 +593,6 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { @@ -614,8 +602,6 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { @@ -626,7 +612,6 @@ static struct phy_driver broadcom_drivers[] = { .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .config_aneg = bcm5481_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { @@ -637,7 +622,6 @@ static struct phy_driver broadcom_drivers[] = { .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .config_aneg = bcm5481_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { @@ -647,7 +631,6 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = bcm5482_config_init, - .config_aneg = genphy_config_aneg, .read_status = bcm5482_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, @@ -658,8 +641,6 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { @@ -669,8 +650,6 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { @@ -680,8 +659,6 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { @@ -691,8 +668,6 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = brcm_fet_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = brcm_fet_ack_interrupt, .config_intr = brcm_fet_config_intr, }, { @@ -702,8 +677,6 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = brcm_fet_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = brcm_fet_ack_interrupt, .config_intr = brcm_fet_config_intr, } }; diff --git a/drivers/net/phy/cicada.c b/drivers/net/phy/cicada.c index d339c1afea77..c05af00bf4b6 100644 --- a/drivers/net/phy/cicada.c +++ b/drivers/net/phy/cicada.c @@ -110,8 +110,6 @@ static struct phy_driver cis820x_driver[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = &cis820x_config_init, - .config_aneg = &genphy_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &cis820x_ack_interrupt, .config_intr = &cis820x_config_intr, }, { @@ -121,8 +119,6 @@ static struct phy_driver cis820x_driver[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = &cis820x_config_init, - .config_aneg = &genphy_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &cis820x_ack_interrupt, .config_intr = &cis820x_config_intr, } }; diff --git a/drivers/net/phy/davicom.c b/drivers/net/phy/davicom.c index e28913d9ea7e..5ee99b3b428c 100644 --- a/drivers/net/phy/davicom.c +++ b/drivers/net/phy/davicom.c @@ -153,7 +153,6 @@ static struct phy_driver dm91xx_driver[] = { .flags = PHY_HAS_INTERRUPT, .config_init = dm9161_config_init, .config_aneg = dm9161_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = dm9161_ack_interrupt, .config_intr = dm9161_config_intr, }, { @@ -164,7 +163,6 @@ static struct phy_driver dm91xx_driver[] = { .flags = PHY_HAS_INTERRUPT, .config_init = dm9161_config_init, .config_aneg = dm9161_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = dm9161_ack_interrupt, .config_intr = dm9161_config_intr, }, { @@ -175,7 +173,6 @@ static struct phy_driver dm91xx_driver[] = { .flags = PHY_HAS_INTERRUPT, .config_init = dm9161_config_init, .config_aneg = dm9161_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = dm9161_ack_interrupt, .config_intr = dm9161_config_intr, }, { @@ -184,8 +181,6 @@ static struct phy_driver dm91xx_driver[] = { .phy_id_mask = 0x0ffffff0, .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = dm9161_ack_interrupt, .config_intr = dm9161_config_intr, } }; diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index cbd629822f04..654f42d00092 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1502,8 +1502,6 @@ static struct phy_driver dp83640_driver = { .probe = dp83640_probe, .remove = dp83640_remove, .config_init = dp83640_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = dp83640_ack_interrupt, .config_intr = dp83640_config_intr, .ts_info = dp83640_ts_info, diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 14335d14e9e4..6e8a2a4f3a6e 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -325,8 +325,6 @@ static struct phy_driver dp83822_driver[] = { .set_wol = dp83822_set_wol, .ack_interrupt = dp83822_ack_interrupt, .config_intr = dp83822_config_intr, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .suspend = dp83822_suspend, .resume = dp83822_resume, }, diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c index 3966d43c5146..cd09c3af2117 100644 --- a/drivers/net/phy/dp83848.c +++ b/drivers/net/phy/dp83848.c @@ -95,8 +95,6 @@ MODULE_DEVICE_TABLE(mdio, dp83848_tbl); .config_init = genphy_config_init, \ .suspend = genphy_suspend, \ .resume = genphy_resume, \ - .config_aneg = genphy_config_aneg, \ - .read_status = genphy_read_status, \ \ /* IRQ related */ \ .ack_interrupt = dp83848_ack_interrupt, \ diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index c1ab976cc800..ab58224f897f 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -324,8 +324,6 @@ static struct phy_driver dp83867_driver[] = { .ack_interrupt = dp83867_ack_interrupt, .config_intr = dp83867_config_intr, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .suspend = genphy_suspend, .resume = genphy_resume, }, diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c index 567280a72241..791587a49215 100644 --- a/drivers/net/phy/icplus.c +++ b/drivers/net/phy/icplus.c @@ -227,8 +227,6 @@ static struct phy_driver icplus_driver[] = { .phy_id_mask = 0x0ffffff0, .features = PHY_GBIT_FEATURES, .config_init = &ip1001_config_init, - .config_aneg = &genphy_config_aneg, - .read_status = &genphy_read_status, .suspend = genphy_suspend, .resume = genphy_resume, }, { @@ -239,8 +237,6 @@ static struct phy_driver icplus_driver[] = { .flags = PHY_HAS_INTERRUPT, .ack_interrupt = ip101a_g_ack_interrupt, .config_init = &ip101a_g_config_init, - .config_aneg = &genphy_config_aneg, - .read_status = &genphy_read_status, .suspend = genphy_suspend, .resume = genphy_resume, } }; diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c index 55f8c52dd2f1..a11f80cb5388 100644 --- a/drivers/net/phy/intel-xway.c +++ b/drivers/net/phy/intel-xway.c @@ -243,7 +243,6 @@ static struct phy_driver xway_gphy[] = { .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, .config_aneg = xway_gphy14_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = xway_gphy_ack_interrupt, .did_interrupt = xway_gphy_did_interrupt, .config_intr = xway_gphy_config_intr, @@ -257,7 +256,6 @@ static struct phy_driver xway_gphy[] = { .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, .config_aneg = xway_gphy14_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = xway_gphy_ack_interrupt, .did_interrupt = xway_gphy_did_interrupt, .config_intr = xway_gphy_config_intr, @@ -271,7 +269,6 @@ static struct phy_driver xway_gphy[] = { .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, .config_aneg = xway_gphy14_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = xway_gphy_ack_interrupt, .did_interrupt = xway_gphy_did_interrupt, .config_intr = xway_gphy_config_intr, @@ -285,7 +282,6 @@ static struct phy_driver xway_gphy[] = { .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, .config_aneg = xway_gphy14_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = xway_gphy_ack_interrupt, .did_interrupt = xway_gphy_did_interrupt, .config_intr = xway_gphy_config_intr, @@ -298,8 +294,6 @@ static struct phy_driver xway_gphy[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = xway_gphy_ack_interrupt, .did_interrupt = xway_gphy_did_interrupt, .config_intr = xway_gphy_config_intr, @@ -312,8 +306,6 @@ static struct phy_driver xway_gphy[] = { .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = xway_gphy_ack_interrupt, .did_interrupt = xway_gphy_did_interrupt, .config_intr = xway_gphy_config_intr, @@ -326,8 +318,6 @@ static struct phy_driver xway_gphy[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = xway_gphy_ack_interrupt, .did_interrupt = xway_gphy_did_interrupt, .config_intr = xway_gphy_config_intr, @@ -340,8 +330,6 @@ static struct phy_driver xway_gphy[] = { .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = xway_gphy_ack_interrupt, .did_interrupt = xway_gphy_did_interrupt, .config_intr = xway_gphy_config_intr, diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c index 09d215177fff..c14b254b2879 100644 --- a/drivers/net/phy/lxt.c +++ b/drivers/net/phy/lxt.c @@ -259,8 +259,6 @@ static struct phy_driver lxt97x_driver[] = { .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = lxt970_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = lxt970_ack_interrupt, .config_intr = lxt970_config_intr, }, { @@ -269,8 +267,6 @@ static struct phy_driver lxt97x_driver[] = { .phy_id_mask = 0xfffffff0, .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = lxt971_ack_interrupt, .config_intr = lxt971_config_intr, }, { @@ -290,7 +286,6 @@ static struct phy_driver lxt97x_driver[] = { .flags = 0, .probe = lxt973_probe, .config_aneg = lxt973_config_aneg, - .read_status = genphy_read_status, } }; module_phy_driver(lxt97x_driver); diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 4d02b27df044..6dbb0f4c34eb 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1958,7 +1958,6 @@ static struct phy_driver marvell_drivers[] = { .probe = marvell_probe, .config_init = &marvell_config_init, .config_aneg = &m88e1101_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, .resume = &genphy_resume, @@ -1976,7 +1975,6 @@ static struct phy_driver marvell_drivers[] = { .probe = marvell_probe, .config_init = &m88e1111_config_init, .config_aneg = &marvell_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, .resume = &genphy_resume, @@ -2012,7 +2010,6 @@ static struct phy_driver marvell_drivers[] = { .probe = marvell_probe, .config_init = &m88e1118_config_init, .config_aneg = &m88e1118_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, .resume = &genphy_resume, @@ -2070,7 +2067,6 @@ static struct phy_driver marvell_drivers[] = { .probe = marvell_probe, .config_init = &m88e1145_config_init, .config_aneg = &marvell_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, .resume = &genphy_resume, @@ -2088,7 +2084,6 @@ static struct phy_driver marvell_drivers[] = { .probe = marvell_probe, .config_init = &m88e1149_config_init, .config_aneg = &m88e1118_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, .resume = &genphy_resume, @@ -2106,7 +2101,6 @@ static struct phy_driver marvell_drivers[] = { .probe = marvell_probe, .config_init = &m88e1111_config_init, .config_aneg = &marvell_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, .resume = &genphy_resume, @@ -2123,8 +2117,6 @@ static struct phy_driver marvell_drivers[] = { .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e1116r_config_init, - .config_aneg = &genphy_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, .resume = &genphy_resume, @@ -2200,7 +2192,6 @@ static struct phy_driver marvell_drivers[] = { .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, - .config_aneg = &genphy_config_aneg, .config_init = &m88e3016_config_init, .aneg_done = &marvell_aneg_done, .read_status = &marvell_read_status, diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c index 1ea69b7585d9..401e3234be58 100644 --- a/drivers/net/phy/meson-gxl.c +++ b/drivers/net/phy/meson-gxl.c @@ -58,9 +58,7 @@ static struct phy_driver meson_gxl_phy[] = { .features = PHY_BASIC_FEATURES, .flags = PHY_IS_INTERNAL, .config_init = meson_gxl_config_init, - .config_aneg = genphy_config_aneg, .aneg_done = genphy_aneg_done, - .read_status = genphy_read_status, .suspend = genphy_suspend, .resume = genphy_resume, }, diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index fdb43dd9b5cd..cf1c5b1ed508 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -799,8 +799,6 @@ static struct phy_driver ksphy_driver[] = { .flags = PHY_HAS_INTERRUPT, .driver_data = &ks8737_type, .config_init = kszphy_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .suspend = genphy_suspend, @@ -814,8 +812,6 @@ static struct phy_driver ksphy_driver[] = { .driver_data = &ksz8021_type, .probe = kszphy_probe, .config_init = kszphy_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .get_sset_count = kszphy_get_sset_count, @@ -832,8 +828,6 @@ static struct phy_driver ksphy_driver[] = { .driver_data = &ksz8021_type, .probe = kszphy_probe, .config_init = kszphy_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .get_sset_count = kszphy_get_sset_count, @@ -851,7 +845,6 @@ static struct phy_driver ksphy_driver[] = { .probe = kszphy_probe, .config_init = ksz8041_config_init, .config_aneg = ksz8041_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .get_sset_count = kszphy_get_sset_count, @@ -868,8 +861,6 @@ static struct phy_driver ksphy_driver[] = { .driver_data = &ksz8041_type, .probe = kszphy_probe, .config_init = kszphy_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .get_sset_count = kszphy_get_sset_count, @@ -886,8 +877,6 @@ static struct phy_driver ksphy_driver[] = { .driver_data = &ksz8051_type, .probe = kszphy_probe, .config_init = kszphy_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .get_sset_count = kszphy_get_sset_count, @@ -904,8 +893,6 @@ static struct phy_driver ksphy_driver[] = { .driver_data = &ksz8041_type, .probe = kszphy_probe, .config_init = kszphy_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .get_sset_count = kszphy_get_sset_count, @@ -922,8 +909,6 @@ static struct phy_driver ksphy_driver[] = { .driver_data = &ksz8081_type, .probe = kszphy_probe, .config_init = kszphy_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .get_sset_count = kszphy_get_sset_count, @@ -938,8 +923,6 @@ static struct phy_driver ksphy_driver[] = { .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = kszphy_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .suspend = genphy_suspend, @@ -953,8 +936,6 @@ static struct phy_driver ksphy_driver[] = { .driver_data = &ksz9021_type, .probe = kszphy_probe, .config_init = ksz9021_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .get_sset_count = kszphy_get_sset_count, @@ -973,7 +954,6 @@ static struct phy_driver ksphy_driver[] = { .driver_data = &ksz9021_type, .probe = kszphy_probe, .config_init = ksz9031_config_init, - .config_aneg = genphy_config_aneg, .read_status = ksz9031_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, @@ -998,8 +978,6 @@ static struct phy_driver ksphy_driver[] = { .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = kszphy_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .suspend = genphy_suspend, .resume = genphy_resume, }, { @@ -1019,8 +997,6 @@ static struct phy_driver ksphy_driver[] = { .name = "Microchip KSZ9477", .features = PHY_GBIT_FEATURES, .config_init = kszphy_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .suspend = genphy_suspend, .resume = genphy_resume, } }; diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 37ee856c7680..0f293ef28935 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -153,7 +153,6 @@ static struct phy_driver microchip_phy_driver[] = { .config_init = genphy_config_init, .config_aneg = lan88xx_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = lan88xx_phy_ack_interrupt, .config_intr = lan88xx_phy_config_intr, diff --git a/drivers/net/phy/national.c b/drivers/net/phy/national.c index 2addf1d3f619..2b1e336961f9 100644 --- a/drivers/net/phy/national.c +++ b/drivers/net/phy/national.c @@ -136,8 +136,6 @@ static struct phy_driver dp83865_driver[] = { { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = ns_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = ns_ack_interrupt, .config_intr = ns_config_intr, } }; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 2b1e67bc1e73..944143b521d7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -493,7 +493,10 @@ static int phy_start_aneg_priv(struct phy_device *phydev, bool sync) /* Invalidate LP advertising flags */ phydev->lp_advertising = 0; - err = phydev->drv->config_aneg(phydev); + if (phydev->drv->config_aneg) + err = phydev->drv->config_aneg(phydev); + else + err = genphy_config_aneg(phydev); if (err < 0) goto out_unlock; @@ -629,9 +632,6 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) if (PHY_HALTED == phydev->state) return IRQ_NONE; /* It can't be ours. */ - disable_irq_nosync(irq); - atomic_inc(&phydev->irq_disable); - phy_change(phydev); return IRQ_HANDLED; @@ -689,7 +689,6 @@ phy_err: */ int phy_start_interrupts(struct phy_device *phydev) { - atomic_set(&phydev->irq_disable, 0); if (request_threaded_irq(phydev->irq, NULL, phy_interrupt, IRQF_ONESHOT | IRQF_SHARED, phydev_name(phydev), phydev) < 0) { @@ -716,13 +715,6 @@ int phy_stop_interrupts(struct phy_device *phydev) free_irq(phydev->irq, phydev); - /* If work indeed has been cancelled, disable_irq() will have - * been left unbalanced from phy_interrupt() and enable_irq() - * has to be called so that other devices on the line work. - */ - while (atomic_dec_return(&phydev->irq_disable) >= 0) - enable_irq(phydev->irq); - return err; } EXPORT_SYMBOL(phy_stop_interrupts); @@ -736,10 +728,11 @@ void phy_change(struct phy_device *phydev) if (phy_interrupt_is_valid(phydev)) { if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev)) - goto ignore; + return; - if (phy_disable_interrupts(phydev)) - goto phy_err; + if (phydev->state == PHY_HALTED) + if (phy_disable_interrupts(phydev)) + goto phy_err; } mutex_lock(&phydev->lock); @@ -747,28 +740,13 @@ void phy_change(struct phy_device *phydev) phydev->state = PHY_CHANGELINK; mutex_unlock(&phydev->lock); - if (phy_interrupt_is_valid(phydev)) { - atomic_dec(&phydev->irq_disable); - enable_irq(phydev->irq); - - /* Reenable interrupts */ - if (PHY_HALTED != phydev->state && - phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED)) - goto irq_enable_err; - } - /* reschedule state queue work to run as soon as possible */ phy_trigger_machine(phydev, true); - return; -ignore: - atomic_dec(&phydev->irq_disable); - enable_irq(phydev->irq); + if (phy_interrupt_is_valid(phydev) && phy_clear_interrupt(phydev)) + goto phy_err; return; -irq_enable_err: - disable_irq(phydev->irq); - atomic_inc(&phydev->irq_disable); phy_err: phy_error(phydev); } @@ -1006,10 +984,6 @@ void phy_state_machine(struct work_struct *work) phydev->state = PHY_NOLINK; phy_link_down(phydev, true); } - - if (phy_interrupt_is_valid(phydev)) - err = phy_config_interrupt(phydev, - PHY_INTERRUPT_ENABLED); break; case PHY_HALTED: if (phydev->link) { diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 67f25ac29025..8154fb706751 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1907,9 +1907,7 @@ static struct phy_driver genphy_driver = { .features = PHY_GBIT_FEATURES | SUPPORTED_MII | SUPPORTED_AUI | SUPPORTED_FIBRE | SUPPORTED_BNC, - .config_aneg = genphy_config_aneg, .aneg_done = genphy_aneg_done, - .read_status = genphy_read_status, .suspend = genphy_suspend, .resume = genphy_resume, .set_loopback = genphy_loopback, diff --git a/drivers/net/phy/qsemi.c b/drivers/net/phy/qsemi.c index dbef8002bc28..889a4dce1648 100644 --- a/drivers/net/phy/qsemi.c +++ b/drivers/net/phy/qsemi.c @@ -118,8 +118,6 @@ static struct phy_driver qs6612_driver[] = { { .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = qs6612_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = qs6612_ack_interrupt, .config_intr = qs6612_config_intr, } }; diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index eda0a6e86918..7c1bf688dd48 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -13,29 +13,67 @@ * option) any later version. * */ +#include <linux/bitops.h> #include <linux/phy.h> #include <linux/module.h> -#define RTL821x_PHYSR 0x11 -#define RTL821x_PHYSR_DUPLEX 0x2000 -#define RTL821x_PHYSR_SPEED 0xc000 -#define RTL821x_INER 0x12 -#define RTL821x_INER_INIT 0x6400 -#define RTL821x_INSR 0x13 -#define RTL821x_PAGE_SELECT 0x1f -#define RTL8211E_INER_LINK_STATUS 0x400 +#define RTL821x_PHYSR 0x11 +#define RTL821x_PHYSR_DUPLEX BIT(13) +#define RTL821x_PHYSR_SPEED GENMASK(15, 14) -#define RTL8211F_INER_LINK_STATUS 0x0010 -#define RTL8211F_INSR 0x1d -#define RTL8211F_TX_DELAY 0x100 +#define RTL821x_INER 0x12 +#define RTL8211B_INER_INIT 0x6400 +#define RTL8211E_INER_LINK_STATUS BIT(10) +#define RTL8211F_INER_LINK_STATUS BIT(4) -#define RTL8201F_ISR 0x1e -#define RTL8201F_IER 0x13 +#define RTL821x_INSR 0x13 + +#define RTL821x_PAGE_SELECT 0x1f + +#define RTL8211F_INSR 0x1d + +#define RTL8211F_TX_DELAY BIT(8) + +#define RTL8201F_ISR 0x1e +#define RTL8201F_IER 0x13 MODULE_DESCRIPTION("Realtek PHY driver"); MODULE_AUTHOR("Johnson Leung"); MODULE_LICENSE("GPL"); +static int rtl8211x_page_read(struct phy_device *phydev, u16 page, u16 address) +{ + int ret; + + ret = phy_write(phydev, RTL821x_PAGE_SELECT, page); + if (ret) + return ret; + + ret = phy_read(phydev, address); + + /* restore to default page 0 */ + phy_write(phydev, RTL821x_PAGE_SELECT, 0x0); + + return ret; +} + +static int rtl8211x_page_write(struct phy_device *phydev, u16 page, + u16 address, u16 val) +{ + int ret; + + ret = phy_write(phydev, RTL821x_PAGE_SELECT, page); + if (ret) + return ret; + + ret = phy_write(phydev, address, val); + + /* restore to default page 0 */ + phy_write(phydev, RTL821x_PAGE_SELECT, 0x0); + + return ret; +} + static int rtl8201_ack_interrupt(struct phy_device *phydev) { int err; @@ -58,31 +96,21 @@ static int rtl8211f_ack_interrupt(struct phy_device *phydev) { int err; - phy_write(phydev, RTL821x_PAGE_SELECT, 0xa43); - err = phy_read(phydev, RTL8211F_INSR); - /* restore to default page 0 */ - phy_write(phydev, RTL821x_PAGE_SELECT, 0x0); + err = rtl8211x_page_read(phydev, 0xa43, RTL8211F_INSR); return (err < 0) ? err : 0; } static int rtl8201_config_intr(struct phy_device *phydev) { - int err; - - /* switch to page 7 */ - phy_write(phydev, RTL821x_PAGE_SELECT, 0x7); + u16 val; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) - err = phy_write(phydev, RTL8201F_IER, - BIT(13) | BIT(12) | BIT(11)); + val = BIT(13) | BIT(12) | BIT(11); else - err = phy_write(phydev, RTL8201F_IER, 0); - - /* restore to default page 0 */ - phy_write(phydev, RTL821x_PAGE_SELECT, 0x0); + val = 0; - return err; + return rtl8211x_page_write(phydev, 0x7, RTL8201F_IER, val); } static int rtl8211b_config_intr(struct phy_device *phydev) @@ -91,7 +119,7 @@ static int rtl8211b_config_intr(struct phy_device *phydev) if (phydev->interrupts == PHY_INTERRUPT_ENABLED) err = phy_write(phydev, RTL821x_INER, - RTL821x_INER_INIT); + RTL8211B_INER_INIT); else err = phy_write(phydev, RTL821x_INER, 0); @@ -113,41 +141,41 @@ static int rtl8211e_config_intr(struct phy_device *phydev) static int rtl8211f_config_intr(struct phy_device *phydev) { - int err; + u16 val; - phy_write(phydev, RTL821x_PAGE_SELECT, 0xa42); if (phydev->interrupts == PHY_INTERRUPT_ENABLED) - err = phy_write(phydev, RTL821x_INER, - RTL8211F_INER_LINK_STATUS); + val = RTL8211F_INER_LINK_STATUS; else - err = phy_write(phydev, RTL821x_INER, 0); - phy_write(phydev, RTL821x_PAGE_SELECT, 0); + val = 0; - return err; + return rtl8211x_page_write(phydev, 0xa42, RTL821x_INER, val); } static int rtl8211f_config_init(struct phy_device *phydev) { int ret; - u16 reg; + u16 val; ret = genphy_config_init(phydev); if (ret < 0) return ret; - phy_write(phydev, RTL821x_PAGE_SELECT, 0xd08); - reg = phy_read(phydev, 0x11); + ret = rtl8211x_page_read(phydev, 0xd08, 0x11); + if (ret < 0) + return ret; + + val = ret & 0xffff; /* enable TX-delay for rgmii-id and rgmii-txid, otherwise disable it */ if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) - reg |= RTL8211F_TX_DELAY; + val |= RTL8211F_TX_DELAY; else - reg &= ~RTL8211F_TX_DELAY; + val &= ~RTL8211F_TX_DELAY; - phy_write(phydev, 0x11, reg); - /* restore to default page 0 */ - phy_write(phydev, RTL821x_PAGE_SELECT, 0x0); + ret = rtl8211x_page_write(phydev, 0xd08, 0x11, val); + if (ret) + return ret; return 0; } @@ -159,16 +187,12 @@ static struct phy_driver realtek_drvs[] = { .phy_id_mask = 0x0000ffff, .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_aneg = &genphy_config_aneg, - .read_status = &genphy_read_status, }, { .phy_id = 0x001cc816, .name = "RTL8201F 10/100Mbps Ethernet", .phy_id_mask = 0x001fffff, .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_aneg = &genphy_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &rtl8201_ack_interrupt, .config_intr = &rtl8201_config_intr, .suspend = genphy_suspend, @@ -179,8 +203,6 @@ static struct phy_driver realtek_drvs[] = { .phy_id_mask = 0x001fffff, .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_aneg = &genphy_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &rtl821x_ack_interrupt, .config_intr = &rtl8211b_config_intr, }, { @@ -189,8 +211,6 @@ static struct phy_driver realtek_drvs[] = { .phy_id_mask = 0x001fffff, .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = rtl821x_ack_interrupt, .config_intr = rtl8211e_config_intr, .suspend = genphy_suspend, @@ -201,8 +221,6 @@ static struct phy_driver realtek_drvs[] = { .phy_id_mask = 0x001fffff, .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_aneg = &genphy_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &rtl821x_ack_interrupt, .config_intr = &rtl8211e_config_intr, .suspend = genphy_suspend, @@ -213,9 +231,7 @@ static struct phy_driver realtek_drvs[] = { .phy_id_mask = 0x001fffff, .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_aneg = &genphy_config_aneg, .config_init = &rtl8211f_config_init, - .read_status = &genphy_read_status, .ack_interrupt = &rtl8211f_ack_interrupt, .config_intr = &rtl8211f_config_intr, .suspend = genphy_suspend, diff --git a/drivers/net/phy/rockchip.c b/drivers/net/phy/rockchip.c index c092af137056..f1da70b9b55f 100644 --- a/drivers/net/phy/rockchip.c +++ b/drivers/net/phy/rockchip.c @@ -213,7 +213,6 @@ static struct phy_driver rockchip_phy_driver[] = { .soft_reset = genphy_soft_reset, .config_init = rockchip_integrated_phy_config_init, .config_aneg = rockchip_config_aneg, - .read_status = genphy_read_status, .suspend = genphy_suspend, .resume = rockchip_phy_resume, }, diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 2306bfae057f..a1961ba87e2b 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -227,8 +227,6 @@ static struct phy_driver smsc_phy_driver[] = { .probe = smsc_phy_probe, /* basic functions */ - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .config_init = smsc_phy_config_init, .soft_reset = smsc_phy_reset, @@ -249,8 +247,6 @@ static struct phy_driver smsc_phy_driver[] = { .probe = smsc_phy_probe, /* basic functions */ - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .config_init = smsc_phy_config_init, .soft_reset = smsc_phy_reset, @@ -276,7 +272,6 @@ static struct phy_driver smsc_phy_driver[] = { .probe = smsc_phy_probe, /* basic functions */ - .config_aneg = genphy_config_aneg, .read_status = lan87xx_read_status, .config_init = smsc_phy_config_init, .soft_reset = smsc_phy_reset, @@ -303,8 +298,6 @@ static struct phy_driver smsc_phy_driver[] = { .probe = smsc_phy_probe, /* basic functions */ - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .config_init = lan911x_config_init, /* IRQ related */ @@ -324,7 +317,6 @@ static struct phy_driver smsc_phy_driver[] = { .probe = smsc_phy_probe, /* basic functions */ - .config_aneg = genphy_config_aneg, .read_status = lan87xx_read_status, .config_init = smsc_phy_config_init, .soft_reset = smsc_phy_reset, @@ -351,7 +343,6 @@ static struct phy_driver smsc_phy_driver[] = { .probe = smsc_phy_probe, /* basic functions */ - .config_aneg = genphy_config_aneg, .read_status = lan87xx_read_status, .config_init = smsc_phy_config_init, .soft_reset = smsc_phy_reset, diff --git a/drivers/net/phy/ste10Xp.c b/drivers/net/phy/ste10Xp.c index d00cfb64529e..fbd548a1ad84 100644 --- a/drivers/net/phy/ste10Xp.c +++ b/drivers/net/phy/ste10Xp.c @@ -89,8 +89,6 @@ static struct phy_driver ste10xp_pdriver[] = { .features = PHY_BASIC_FEATURES | SUPPORTED_Pause, .flags = PHY_HAS_INTERRUPT, .config_init = ste10Xp_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = ste10Xp_ack_interrupt, .config_intr = ste10Xp_config_intr, .suspend = genphy_suspend, @@ -102,8 +100,6 @@ static struct phy_driver ste10xp_pdriver[] = { .features = PHY_BASIC_FEATURES | SUPPORTED_Pause, .flags = PHY_HAS_INTERRUPT, .config_init = ste10Xp_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, .ack_interrupt = ste10Xp_ack_interrupt, .config_intr = ste10Xp_config_intr, .suspend = genphy_suspend, diff --git a/drivers/net/phy/uPD60620.c b/drivers/net/phy/uPD60620.c index 96b33475ea5e..55f48ee3595a 100644 --- a/drivers/net/phy/uPD60620.c +++ b/drivers/net/phy/uPD60620.c @@ -95,7 +95,6 @@ static struct phy_driver upd60620_driver[1] = { { .features = PHY_BASIC_FEATURES, .flags = 0, .config_init = upd60620_config_init, - .config_aneg = genphy_config_aneg, .read_status = upd60620_read_status, } }; diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index f78ff0279648..d9dd8fbfffc7 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -267,7 +267,6 @@ static struct phy_driver vsc82xx_driver[] = { .flags = PHY_HAS_INTERRUPT, .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, }, { @@ -278,7 +277,6 @@ static struct phy_driver vsc82xx_driver[] = { .flags = PHY_HAS_INTERRUPT, .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, }, { @@ -289,7 +287,6 @@ static struct phy_driver vsc82xx_driver[] = { .flags = PHY_HAS_INTERRUPT, .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, }, { @@ -300,7 +297,6 @@ static struct phy_driver vsc82xx_driver[] = { .flags = PHY_HAS_INTERRUPT, .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, }, { @@ -311,7 +307,6 @@ static struct phy_driver vsc82xx_driver[] = { .flags = PHY_HAS_INTERRUPT, .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, }, { @@ -321,8 +316,6 @@ static struct phy_driver vsc82xx_driver[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = &vsc8601_config_init, - .config_aneg = &genphy_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, }, { @@ -333,7 +326,6 @@ static struct phy_driver vsc82xx_driver[] = { .flags = PHY_HAS_INTERRUPT, .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, }, { @@ -344,8 +336,6 @@ static struct phy_driver vsc82xx_driver[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = &vsc8221_config_init, - .config_aneg = &genphy_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, }, { @@ -356,8 +346,6 @@ static struct phy_driver vsc82xx_driver[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = &vsc8221_config_init, - .config_aneg = &genphy_config_aneg, - .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, } }; diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 9c51b8be0038..5ba222920e80 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.a.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.11.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01040a00 +#define VMXNET3_DRIVER_VERSION_NUM 0x01040b00 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ @@ -416,8 +416,8 @@ struct vmxnet3_adapter { /* must be a multiple of VMXNET3_RING_SIZE_ALIGN */ #define VMXNET3_DEF_TX_RING_SIZE 512 -#define VMXNET3_DEF_RX_RING_SIZE 256 -#define VMXNET3_DEF_RX_RING2_SIZE 128 +#define VMXNET3_DEF_RX_RING_SIZE 1024 +#define VMXNET3_DEF_RX_RING2_SIZE 256 #define VMXNET3_DEF_RXDATA_DESC_SIZE 128 diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index f3e97c5f94c9..5f8bd0cebddf 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -127,28 +127,6 @@ struct hv_ring_buffer_info { u32 priv_read_index; }; -/* - * - * hv_get_ringbuffer_availbytes() - * - * Get number of bytes available to read and to write to - * for the specified ring buffer - */ -static inline void -hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi, - u32 *read, u32 *write) -{ - u32 read_loc, write_loc, dsize; - - /* Capture the read/write indices before they changed */ - read_loc = rbi->ring_buffer->read_index; - write_loc = rbi->ring_buffer->write_index; - dsize = rbi->ring_datasize; - - *write = write_loc >= read_loc ? dsize - (write_loc - read_loc) : - read_loc - write_loc; - *read = dsize - *write; -} static inline u32 hv_get_bytes_to_read(const struct hv_ring_buffer_info *rbi) { diff --git a/include/linux/phy.h b/include/linux/phy.h index e00fd9ce3bce..50030da01664 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -468,7 +468,6 @@ struct phy_device { /* Interrupt and Polling infrastructure */ struct work_struct phy_queue; struct delayed_work state_queue; - atomic_t irq_disable; struct mutex lock; @@ -497,13 +496,13 @@ struct phy_device { * flags: A bitfield defining certain other features this PHY * supports (like interrupts) * - * The drivers must implement config_aneg and read_status. All - * other functions are optional. Note that none of these - * functions should be called from interrupt time. The goal is - * for the bus read/write functions to be able to block when the - * bus transaction is happening, and be freed up by an interrupt - * (The MPC85xx has this ability, though it is not currently - * supported in the driver). + * All functions are optional. If config_aneg or read_status + * are not implemented, the phy core uses the genphy versions. + * Note that none of these functions should be called from + * interrupt time. The goal is for the bus read/write functions + * to be able to block when the bus transaction is happening, + * and be freed up by an interrupt (The MPC85xx has this ability, + * though it is not currently supported in the driver). */ struct phy_driver { struct mdio_driver_common mdiodrv; @@ -841,14 +840,6 @@ int phy_aneg_done(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); int phy_restart_aneg(struct phy_device *phydev); -static inline int phy_read_status(struct phy_device *phydev) -{ - if (!phydev->drv) - return -EIO; - - return phydev->drv->read_status(phydev); -} - #define phydev_err(_phydev, format, args...) \ dev_err(&_phydev->mdio.dev, format, ##args) @@ -890,6 +881,17 @@ int genphy_c45_read_pma(struct phy_device *phydev); int genphy_c45_pma_setup_forced(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); +static inline int phy_read_status(struct phy_device *phydev) +{ + if (!phydev->drv) + return -EIO; + + if (phydev->drv->read_status) + return phydev->drv->read_status(phydev); + else + return genphy_read_status(phydev); +} + void phy_driver_unregister(struct phy_driver *drv); void phy_drivers_unregister(struct phy_driver *drv, int n); int phy_driver_register(struct phy_driver *new_driver, struct module *owner); diff --git a/include/net/dsa.h b/include/net/dsa.h index 2a05738570d8..8198efcc8ced 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -296,20 +296,23 @@ static inline u32 dsa_user_ports(struct dsa_switch *ds) return mask; } +/* Return the local port used to reach an arbitrary switch port */ +static inline unsigned int dsa_towards_port(struct dsa_switch *ds, int device, + int port) +{ + if (device == ds->index) + return port; + else + return ds->rtable[device]; +} + +/* Return the local port used to reach the dedicated CPU port */ static inline u8 dsa_upstream_port(struct dsa_switch *ds) { struct dsa_switch_tree *dst = ds->dst; + struct dsa_port *cpu_dp = dst->cpu_dp; - /* - * If this is the root switch (i.e. the switch that connects - * to the CPU), return the cpu port number on this switch. - * Else return the (DSA) port number that connects to the - * switch that is one hop closer to the cpu. - */ - if (dst->cpu_dp->ds == ds) - return dst->cpu_dp->index; - else - return ds->rtable[dst->cpu_dp->ds->index]; + return dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index); } typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, @@ -412,12 +415,10 @@ struct dsa_switch_ops { */ int (*port_vlan_filtering)(struct dsa_switch *ds, int port, bool vlan_filtering); - int (*port_vlan_prepare)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); - void (*port_vlan_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); + int (*port_vlan_prepare)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan); + void (*port_vlan_add)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); /* @@ -433,12 +434,10 @@ struct dsa_switch_ops { /* * Multicast database */ - int (*port_mdb_prepare)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans); - void (*port_mdb_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans); + int (*port_mdb_prepare)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb); + void (*port_mdb_add)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb); int (*port_mdb_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb); /* diff --git a/include/net/erspan.h b/include/net/erspan.h index ca94fc86865e..6e758d08c9ee 100644 --- a/include/net/erspan.h +++ b/include/net/erspan.h @@ -58,4 +58,55 @@ struct erspanhdr { struct erspan_metadata md; }; +static inline u8 tos_to_cos(u8 tos) +{ + u8 dscp, cos; + + dscp = tos >> 2; + cos = dscp >> 3; + return cos; +} + +static inline void erspan_build_header(struct sk_buff *skb, + __be32 id, u32 index, + bool truncate, bool is_ipv4) +{ + struct ethhdr *eth = eth_hdr(skb); + enum erspan_encap_type enc_type; + struct erspanhdr *ershdr; + struct qtag_prefix { + __be16 eth_type; + __be16 tci; + } *qp; + u16 vlan_tci = 0; + u8 tos; + + tos = is_ipv4 ? ip_hdr(skb)->tos : + (ipv6_hdr(skb)->priority << 4) + + (ipv6_hdr(skb)->flow_lbl[0] >> 4); + + enc_type = ERSPAN_ENCAP_NOVLAN; + + /* If mirrored packet has vlan tag, extract tci and + * perserve vlan header in the mirrored frame. + */ + if (eth->h_proto == htons(ETH_P_8021Q)) { + qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); + vlan_tci = ntohs(qp->tci); + enc_type = ERSPAN_ENCAP_INFRAME; + } + + skb_push(skb, sizeof(*ershdr)); + ershdr = (struct erspanhdr *)skb->data; + memset(ershdr, 0, sizeof(*ershdr)); + + ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) | + (ERSPAN_VERSION << VER_OFFSET)); + ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) | + ((tos_to_cos(tos) << COS_OFFSET) & COS_MASK) | + (enc_type << EN_OFFSET & EN_MASK) | + ((truncate << T_OFFSET) & T_MASK)); + ershdr->md.index = htonl(index & INDEX_MASK); +} + #endif diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 0358745ea059..8e1bf9ae4a5e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops { * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_data ULP private data + * @icsk_listen_portaddr_node hash to the portaddr listener hashtable * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@ -101,6 +102,7 @@ struct inet_connection_sock { const struct inet_connection_sock_af_ops *icsk_af_ops; const struct tcp_ulp_ops *icsk_ulp_ops; void *icsk_ulp_data; + struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:6, icsk_ca_setsockopt:1, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 2dbbbff5e1e3..9141e95529e7 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -111,6 +111,7 @@ struct inet_bind_hashbucket { */ struct inet_listen_hashbucket { spinlock_t lock; + unsigned int count; struct hlist_head head; }; @@ -132,12 +133,13 @@ struct inet_hashinfo { /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. */ + struct kmem_cache *bind_bucket_cachep; struct inet_bind_hashbucket *bhash; - unsigned int bhash_size; - /* 4 bytes hole on 64 bit */ - struct kmem_cache *bind_bucket_cachep; + /* The 2nd listener table hashed by local port and address */ + unsigned int lhash2_mask; + struct inet_listen_hashbucket *lhash2; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. @@ -145,14 +147,25 @@ struct inet_hashinfo { * Now align to a new cache line as all the following members * might be often dirty. */ - /* All sockets in TCP_LISTEN state will be in here. This is the only - * table where wildcard'd TCP sockets can exist. Hash function here - * is just local port number. + /* All sockets in TCP_LISTEN state will be in listening_hash. + * This is the only table where wildcard'd TCP sockets can + * exist. listening_hash is only hashed by local port number. + * If lhash2 is initialized, the same socket will also be hashed + * to lhash2 by port and address. */ struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] ____cacheline_aligned_in_smp; }; +#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \ + hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node) + +static inline struct inet_listen_hashbucket * +inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash) +{ + return &h->lhash2[hash & h->lhash2_mask]; +} + static inline struct inet_ehash_bucket *inet_ehash_bucket( struct inet_hashinfo *hashinfo, unsigned int hash) @@ -208,6 +221,10 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child); void inet_put_port(struct sock *sk); void inet_hashinfo_init(struct inet_hashinfo *h); +void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, + unsigned long numentries, int scale, + unsigned long low_limit, + unsigned long high_limit); bool inet_ehash_insert(struct sock *sk, struct sock *osk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); diff --git a/include/net/ip.h b/include/net/ip.h index 9896f46cbbf1..fc9bf1b1fe2c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -26,12 +26,14 @@ #include <linux/ip.h> #include <linux/in.h> #include <linux/skbuff.h> +#include <linux/jhash.h> #include <net/inet_sock.h> #include <net/route.h> #include <net/snmp.h> #include <net/flow.h> #include <net/flow_dissector.h> +#include <net/netns/hash.h> #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ @@ -521,6 +523,13 @@ static inline unsigned int ipv4_addr_hash(__be32 ip) return (__force unsigned int) ip; } +static inline u32 ipv4_portaddr_hash(const struct net *net, + __be32 saddr, + unsigned int port) +{ + return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; +} + bool ip_call_ra_chain(struct sk_buff *skb); /* diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index d66f70f63734..109a5a8877ef 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -36,6 +36,7 @@ struct __ip6_tnl_parm { __be32 o_key; __u32 fwmark; + __u32 index; /* ERSPAN type II index */ }; /* IPv6 tunnel */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f73797e2fa60..25be4715578c 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -22,6 +22,7 @@ #include <net/flow.h> #include <net/flow_dissector.h> #include <net/snmp.h> +#include <net/netns/hash.h> #define SIN6_LEN_RFC2133 24 @@ -673,6 +674,22 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) cpu_to_be32(0x0000ffff))) == 0UL; } +static inline u32 ipv6_portaddr_hash(const struct net *net, + const struct in6_addr *addr6, + unsigned int port) +{ + unsigned int hash, mix = net_hash_mix(net); + + if (ipv6_addr_any(addr6)) + hash = jhash_1word(0, mix); + else if (ipv6_addr_v4mapped(addr6)) + hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix); + else + hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix); + + return hash ^ port; +} + /* * Check for a RFC 4843 ORCHID address * (Overlay Routable Cryptographic Hash Identifiers) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index ead018744ff5..14b6b3af8918 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -13,10 +13,10 @@ enum rtnl_link_flags { RTNL_FLAG_DOIT_UNLOCKED = 1, }; -int __rtnl_register(int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); void rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); +int rtnl_register_module(struct module *owner, int protocol, int msgtype, + rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_unregister(int protocol, int msgtype); void rtnl_unregister_all(int protocol); diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index ac71559314e7..44a0b675a6bc 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1686,6 +1686,7 @@ enum ethtool_reset_flags { ETH_RESET_PHY = 1 << 6, /* Transceiver/PHY */ ETH_RESET_RAM = 1 << 7, /* RAM shared between * multiple components */ + ETH_RESET_AP = 1 << 8, /* Application processor */ ETH_RESET_DEDICATED = 0x0000ffff, /* All components dedicated to * this interface */ diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index b0f4c734900b..6d9f48bd374a 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -760,9 +760,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void br_mdb_init(void) { - rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0); - rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0); + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0); } void br_mdb_uninit(void) diff --git a/net/can/gw.c b/net/can/gw.c index 73a02af4b5d7..398dd0395ad9 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -1014,6 +1014,8 @@ static struct pernet_operations cangw_pernet_ops = { static __init int cgw_module_init(void) { + int ret; + /* sanitize given module parameter */ max_hops = clamp_t(unsigned int, max_hops, CGW_MIN_HOPS, CGW_MAX_HOPS); @@ -1031,15 +1033,19 @@ static __init int cgw_module_init(void) notifier.notifier_call = cgw_notifier; register_netdevice_notifier(¬ifier); - if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, 0)) { + ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE, + NULL, cgw_dump_jobs, 0); + if (ret) { unregister_netdevice_notifier(¬ifier); kmem_cache_destroy(cgw_cache); return -ENOBUFS; } - /* Only the first call to __rtnl_register can fail */ - __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, 0); - __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, 0); + /* Only the first call to rtnl_register_module can fail */ + rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE, + cgw_create_job, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE, + cgw_remove_job, NULL, 0); return 0; } diff --git a/net/core/filter.c b/net/core/filter.c index 6a85e67fafce..8ec5a504eb28 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3026,10 +3026,11 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, IPV6_FLOWLABEL_MASK; } else { info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); - if (flags & BPF_F_ZERO_CSUM_TX) - info->key.tun_flags &= ~TUNNEL_CSUM; } + if (flags & BPF_F_ZERO_CSUM_TX) + info->key.tun_flags &= ~TUNNEL_CSUM; + return 0; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9c4cb584bfb0..a55d0c236b40 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -62,7 +62,9 @@ struct rtnl_link { rtnl_doit_func doit; rtnl_dumpit_func dumpit; + struct module *owner; unsigned int flags; + struct rcu_head rcu; }; static DEFINE_MUTEX(rtnl_mutex); @@ -127,8 +129,7 @@ bool lockdep_rtnl_is_held(void) EXPORT_SYMBOL(lockdep_rtnl_is_held); #endif /* #ifdef CONFIG_PROVE_LOCKING */ -static struct rtnl_link __rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; -static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1]; +static struct rtnl_link __rcu **rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; static inline int rtm_msgindex(int msgtype) { @@ -144,70 +145,125 @@ static inline int rtm_msgindex(int msgtype) return msgindex; } -/** - * __rtnl_register - Register a rtnetlink message type - * @protocol: Protocol family or PF_UNSPEC - * @msgtype: rtnetlink message type - * @doit: Function pointer called for each request message - * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions - * - * Registers the specified function pointers (at least one of them has - * to be non-NULL) to be called whenever a request message for the - * specified protocol family and message type is received. - * - * The special protocol family PF_UNSPEC may be used to define fallback - * function pointers for the case when no entry for the specific protocol - * family exists. - * - * Returns 0 on success or a negative error code. - */ -int __rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit, - unsigned int flags) +static struct rtnl_link *rtnl_get_link(int protocol, int msgtype) +{ + struct rtnl_link **tab; + + if (protocol >= ARRAY_SIZE(rtnl_msg_handlers)) + protocol = PF_UNSPEC; + + tab = rcu_dereference_rtnl(rtnl_msg_handlers[protocol]); + if (!tab) + tab = rcu_dereference_rtnl(rtnl_msg_handlers[PF_UNSPEC]); + + return tab[msgtype]; +} + +static int rtnl_register_internal(struct module *owner, + int protocol, int msgtype, + rtnl_doit_func doit, rtnl_dumpit_func dumpit, + unsigned int flags) { - struct rtnl_link *tab; + struct rtnl_link **tab, *link, *old; int msgindex; + int ret = -ENOBUFS; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype); - tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]); + rtnl_lock(); + tab = rtnl_msg_handlers[protocol]; if (tab == NULL) { - tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL); - if (tab == NULL) - return -ENOBUFS; + tab = kcalloc(RTM_NR_MSGTYPES, sizeof(void *), GFP_KERNEL); + if (!tab) + goto unlock; + /* ensures we see the 0 stores */ rcu_assign_pointer(rtnl_msg_handlers[protocol], tab); } + old = rtnl_dereference(tab[msgindex]); + if (old) { + link = kmemdup(old, sizeof(*old), GFP_KERNEL); + if (!link) + goto unlock; + } else { + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) + goto unlock; + } + + WARN_ON(link->owner && link->owner != owner); + link->owner = owner; + + WARN_ON(doit && link->doit && link->doit != doit); if (doit) - tab[msgindex].doit = doit; + link->doit = doit; + WARN_ON(dumpit && link->dumpit && link->dumpit != dumpit); if (dumpit) - tab[msgindex].dumpit = dumpit; - tab[msgindex].flags |= flags; + link->dumpit = dumpit; - return 0; + link->flags |= flags; + + /* publish protocol:msgtype */ + rcu_assign_pointer(tab[msgindex], link); + ret = 0; + if (old) + kfree_rcu(old, rcu); +unlock: + rtnl_unlock(); + return ret; } -EXPORT_SYMBOL_GPL(__rtnl_register); + +/** + * rtnl_register_module - Register a rtnetlink message type + * + * @owner: module registering the hook (THIS_MODULE) + * @protocol: Protocol family or PF_UNSPEC + * @msgtype: rtnetlink message type + * @doit: Function pointer called for each request message + * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message + * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions + * + * Like rtnl_register, but for use by removable modules. + */ +int rtnl_register_module(struct module *owner, + int protocol, int msgtype, + rtnl_doit_func doit, rtnl_dumpit_func dumpit, + unsigned int flags) +{ + return rtnl_register_internal(owner, protocol, msgtype, + doit, dumpit, flags); +} +EXPORT_SYMBOL_GPL(rtnl_register_module); /** * rtnl_register - Register a rtnetlink message type + * @protocol: Protocol family or PF_UNSPEC + * @msgtype: rtnetlink message type + * @doit: Function pointer called for each request message + * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message + * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions + * + * Registers the specified function pointers (at least one of them has + * to be non-NULL) to be called whenever a request message for the + * specified protocol family and message type is received. * - * Identical to __rtnl_register() but panics on failure. This is useful - * as failure of this function is very unlikely, it can only happen due - * to lack of memory when allocating the chain to store all message - * handlers for a protocol. Meant for use in init functions where lack - * of memory implies no sense in continuing. + * The special protocol family PF_UNSPEC may be used to define fallback + * function pointers for the case when no entry for the specific protocol + * family exists. */ void rtnl_register(int protocol, int msgtype, rtnl_doit_func doit, rtnl_dumpit_func dumpit, unsigned int flags) { - if (__rtnl_register(protocol, msgtype, doit, dumpit, flags) < 0) - panic("Unable to register rtnetlink message handler, " - "protocol = %d, message type = %d\n", - protocol, msgtype); + int err; + + err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit, + flags); + if (err) + pr_err("Unable to register rtnetlink message handler, " + "protocol = %d, message type = %d\n", protocol, msgtype); } EXPORT_SYMBOL_GPL(rtnl_register); @@ -220,24 +276,25 @@ EXPORT_SYMBOL_GPL(rtnl_register); */ int rtnl_unregister(int protocol, int msgtype) { - struct rtnl_link *handlers; + struct rtnl_link **tab, *link; int msgindex; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype); rtnl_lock(); - handlers = rtnl_dereference(rtnl_msg_handlers[protocol]); - if (!handlers) { + tab = rtnl_dereference(rtnl_msg_handlers[protocol]); + if (!tab) { rtnl_unlock(); return -ENOENT; } - handlers[msgindex].doit = NULL; - handlers[msgindex].dumpit = NULL; - handlers[msgindex].flags = 0; + link = tab[msgindex]; + rcu_assign_pointer(tab[msgindex], NULL); rtnl_unlock(); + kfree_rcu(link, rcu); + return 0; } EXPORT_SYMBOL_GPL(rtnl_unregister); @@ -251,20 +308,27 @@ EXPORT_SYMBOL_GPL(rtnl_unregister); */ void rtnl_unregister_all(int protocol) { - struct rtnl_link *handlers; + struct rtnl_link **tab, *link; + int msgindex; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); rtnl_lock(); - handlers = rtnl_dereference(rtnl_msg_handlers[protocol]); + tab = rtnl_msg_handlers[protocol]; RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL); + for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) { + link = tab[msgindex]; + if (!link) + continue; + + rcu_assign_pointer(tab[msgindex], NULL); + kfree_rcu(link, rcu); + } rtnl_unlock(); synchronize_net(); - while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1) - schedule(); - kfree(handlers); + kfree(tab); } EXPORT_SYMBOL_GPL(rtnl_unregister_all); @@ -2977,18 +3041,26 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) s_idx = 1; for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) { + struct rtnl_link **tab; int type = cb->nlh->nlmsg_type-RTM_BASE; - struct rtnl_link *handlers; + struct rtnl_link *link; rtnl_dumpit_func dumpit; if (idx < s_idx || idx == PF_PACKET) continue; - handlers = rtnl_dereference(rtnl_msg_handlers[idx]); - if (!handlers) + if (type < 0 || type >= RTM_NR_MSGTYPES) continue; - dumpit = READ_ONCE(handlers[type].dumpit); + tab = rcu_dereference_rtnl(rtnl_msg_handlers[idx]); + if (!tab) + continue; + + link = tab[type]; + if (!link) + continue; + + dumpit = link->dumpit; if (!dumpit) continue; @@ -4318,7 +4390,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); - struct rtnl_link *handlers; + struct rtnl_link *link; + struct module *owner; int err = -EOPNOTSUPP; rtnl_doit_func doit; unsigned int flags; @@ -4342,79 +4415,85 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; - if (family >= ARRAY_SIZE(rtnl_msg_handlers)) - family = PF_UNSPEC; - rcu_read_lock(); - handlers = rcu_dereference(rtnl_msg_handlers[family]); - if (!handlers) { - family = PF_UNSPEC; - handlers = rcu_dereference(rtnl_msg_handlers[family]); - } - if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; u16 min_dump_alloc = 0; - dumpit = READ_ONCE(handlers[type].dumpit); - if (!dumpit) { + link = rtnl_get_link(family, type); + if (!link || !link->dumpit) { family = PF_UNSPEC; - handlers = rcu_dereference(rtnl_msg_handlers[PF_UNSPEC]); - if (!handlers) - goto err_unlock; - - dumpit = READ_ONCE(handlers[type].dumpit); - if (!dumpit) + link = rtnl_get_link(family, type); + if (!link || !link->dumpit) goto err_unlock; } - - refcount_inc(&rtnl_msg_handlers_ref[family]); + owner = link->owner; + dumpit = link->dumpit; if (type == RTM_GETLINK - RTM_BASE) min_dump_alloc = rtnl_calcit(skb, nlh); + err = 0; + /* need to do this before rcu_read_unlock() */ + if (!try_module_get(owner)) + err = -EPROTONOSUPPORT; + rcu_read_unlock(); rtnl = net->rtnl; - { + if (err == 0) { struct netlink_dump_control c = { .dump = dumpit, .min_dump_alloc = min_dump_alloc, + .module = owner, }; err = netlink_dump_start(rtnl, skb, nlh, &c); + /* netlink_dump_start() will keep a reference on + * module if dump is still in progress. + */ + module_put(owner); } - refcount_dec(&rtnl_msg_handlers_ref[family]); return err; } - doit = READ_ONCE(handlers[type].doit); - if (!doit) { + link = rtnl_get_link(family, type); + if (!link || !link->doit) { family = PF_UNSPEC; - handlers = rcu_dereference(rtnl_msg_handlers[family]); + link = rtnl_get_link(PF_UNSPEC, type); + if (!link || !link->doit) + goto out_unlock; + } + + owner = link->owner; + if (!try_module_get(owner)) { + err = -EPROTONOSUPPORT; + goto out_unlock; } - flags = READ_ONCE(handlers[type].flags); + flags = link->flags; if (flags & RTNL_FLAG_DOIT_UNLOCKED) { - refcount_inc(&rtnl_msg_handlers_ref[family]); - doit = READ_ONCE(handlers[type].doit); + doit = link->doit; rcu_read_unlock(); if (doit) err = doit(skb, nlh, extack); - refcount_dec(&rtnl_msg_handlers_ref[family]); + module_put(owner); return err; } - rcu_read_unlock(); rtnl_lock(); - handlers = rtnl_dereference(rtnl_msg_handlers[family]); - if (handlers) { - doit = READ_ONCE(handlers[type].doit); - if (doit) - err = doit(skb, nlh, extack); - } + link = rtnl_get_link(family, type); + if (link && link->doit) + err = link->doit(skb, nlh, extack); rtnl_unlock(); + + module_put(owner); + + return err; + +out_unlock: + rcu_read_unlock(); return err; err_unlock: @@ -4502,11 +4581,6 @@ static struct pernet_operations rtnetlink_net_ops = { void __init rtnetlink_init(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(rtnl_msg_handlers_ref); i++) - refcount_set(&rtnl_msg_handlers_ref[i], 1); - if (register_pernet_subsys(&rtnetlink_net_ops)) panic("rtnetlink_init: cannot initialize rtnetlink\n"); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 9153247dad28..d1885cf59319 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1418,9 +1418,12 @@ void __init dn_dev_init(void) dn_dev_devices_on(); - rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, 0); - rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, 0); - rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, 0); + rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWADDR, + dn_nl_newaddr, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELADDR, + dn_nl_deladdr, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR, + NULL, dn_nl_dump_ifaddr, 0); proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index b37a1b833c77..fce94cbd4378 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -792,8 +792,10 @@ void __init dn_fib_init(void) register_dnaddr_notifier(&dn_fib_dnaddr_notifier); - rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, 0); - rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWROUTE, + dn_fib_rtm_newroute, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELROUTE, + dn_fib_rtm_delroute, NULL, 0); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 4b3ca70be723..73160d4aebbe 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1923,11 +1923,11 @@ void __init dn_route_init(void) &dn_rt_cache_seq_fops); #ifdef CONFIG_DECNET_ROUTER - rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, - dn_fib_dump, 0); + rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE, + dn_cache_getroute, dn_fib_dump, 0); #else - rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, - dn_cache_dump, 0); + rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE, + dn_cache_getroute, dn_cache_dump, 0); #endif } diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 29608d087a7c..b93511726069 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -83,29 +83,52 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, static int dsa_switch_fdb_add(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { - /* Do not care yet about other switch chips of the fabric */ - if (ds->index != info->sw_index) - return 0; + int port = dsa_towards_port(ds, info->sw_index, info->port); if (!ds->ops->port_fdb_add) return -EOPNOTSUPP; - return ds->ops->port_fdb_add(ds, info->port, info->addr, - info->vid); + return ds->ops->port_fdb_add(ds, port, info->addr, info->vid); } static int dsa_switch_fdb_del(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { - /* Do not care yet about other switch chips of the fabric */ - if (ds->index != info->sw_index) - return 0; + int port = dsa_towards_port(ds, info->sw_index, info->port); if (!ds->ops->port_fdb_del) return -EOPNOTSUPP; - return ds->ops->port_fdb_del(ds, info->port, info->addr, - info->vid); + return ds->ops->port_fdb_del(ds, port, info->addr, info->vid); +} + +static int +dsa_switch_mdb_prepare_bitmap(struct dsa_switch *ds, + const struct switchdev_obj_port_mdb *mdb, + const unsigned long *bitmap) +{ + int port, err; + + if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) + return -EOPNOTSUPP; + + for_each_set_bit(port, bitmap, ds->num_ports) { + err = ds->ops->port_mdb_prepare(ds, port, mdb); + if (err) + return err; + } + + return 0; +} + +static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds, + const struct switchdev_obj_port_mdb *mdb, + const unsigned long *bitmap) +{ + int port; + + for_each_set_bit(port, bitmap, ds->num_ports) + ds->ops->port_mdb_add(ds, port, mdb); } static int dsa_switch_mdb_add(struct dsa_switch *ds, @@ -114,7 +137,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, const struct switchdev_obj_port_mdb *mdb = info->mdb; struct switchdev_trans *trans = info->trans; DECLARE_BITMAP(group, ds->num_ports); - int port, err; + int port; /* Build a mask of Multicast group members */ bitmap_zero(group, ds->num_ports); @@ -124,21 +147,10 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, if (dsa_is_dsa_port(ds, port)) set_bit(port, group); - if (switchdev_trans_ph_prepare(trans)) { - if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) - return -EOPNOTSUPP; - - for_each_set_bit(port, group, ds->num_ports) { - err = ds->ops->port_mdb_prepare(ds, port, mdb, trans); - if (err) - return err; - } - - return 0; - } + if (switchdev_trans_ph_prepare(trans)) + return dsa_switch_mdb_prepare_bitmap(ds, mdb, group); - for_each_set_bit(port, group, ds->num_ports) - ds->ops->port_mdb_add(ds, port, mdb, trans); + dsa_switch_mdb_add_bitmap(ds, mdb, group); return 0; } @@ -157,13 +169,43 @@ static int dsa_switch_mdb_del(struct dsa_switch *ds, return 0; } +static int +dsa_switch_vlan_prepare_bitmap(struct dsa_switch *ds, + const struct switchdev_obj_port_vlan *vlan, + const unsigned long *bitmap) +{ + int port, err; + + if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) + return -EOPNOTSUPP; + + for_each_set_bit(port, bitmap, ds->num_ports) { + err = ds->ops->port_vlan_prepare(ds, port, vlan); + if (err) + return err; + } + + return 0; +} + +static void +dsa_switch_vlan_add_bitmap(struct dsa_switch *ds, + const struct switchdev_obj_port_vlan *vlan, + const unsigned long *bitmap) +{ + int port; + + for_each_set_bit(port, bitmap, ds->num_ports) + ds->ops->port_vlan_add(ds, port, vlan); +} + static int dsa_switch_vlan_add(struct dsa_switch *ds, struct dsa_notifier_vlan_info *info) { const struct switchdev_obj_port_vlan *vlan = info->vlan; struct switchdev_trans *trans = info->trans; DECLARE_BITMAP(members, ds->num_ports); - int port, err; + int port; /* Build a mask of VLAN members */ bitmap_zero(members, ds->num_ports); @@ -173,21 +215,10 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds, if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) set_bit(port, members); - if (switchdev_trans_ph_prepare(trans)) { - if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) - return -EOPNOTSUPP; - - for_each_set_bit(port, members, ds->num_ports) { - err = ds->ops->port_vlan_prepare(ds, port, vlan, trans); - if (err) - return err; - } - - return 0; - } + if (switchdev_trans_ph_prepare(trans)) + return dsa_switch_vlan_prepare_bitmap(ds, vlan, members); - for_each_set_bit(port, members, ds->num_ports) - ds->ops->port_vlan_add(ds, port, vlan, trans); + dsa_switch_vlan_add_bitmap(ds, vlan, members); return 0; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 427b705d7c64..f6f58108b4c5 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/wait.h> #include <linux/vmalloc.h> +#include <linux/bootmem.h> #include <net/addrconf.h> #include <net/inet_connection_sock.h> @@ -168,6 +169,60 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child) } EXPORT_SYMBOL_GPL(__inet_inherit_port); +static struct inet_listen_hashbucket * +inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk) +{ + u32 hash; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + hash = ipv6_portaddr_hash(sock_net(sk), + &sk->sk_v6_rcv_saddr, + inet_sk(sk)->inet_num); + else +#endif + hash = ipv4_portaddr_hash(sock_net(sk), + inet_sk(sk)->inet_rcv_saddr, + inet_sk(sk)->inet_num); + return inet_lhash2_bucket(h, hash); +} + +static void inet_hash2(struct inet_hashinfo *h, struct sock *sk) +{ + struct inet_listen_hashbucket *ilb2; + + if (!h->lhash2) + return; + + ilb2 = inet_lhash2_bucket_sk(h, sk); + + spin_lock(&ilb2->lock); + if (sk->sk_reuseport && sk->sk_family == AF_INET6) + hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node, + &ilb2->head); + else + hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node, + &ilb2->head); + ilb2->count++; + spin_unlock(&ilb2->lock); +} + +static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk) +{ + struct inet_listen_hashbucket *ilb2; + + if (!h->lhash2 || + WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node))) + return; + + ilb2 = inet_lhash2_bucket_sk(h, sk); + + spin_lock(&ilb2->lock); + hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node); + ilb2->count--; + spin_unlock(&ilb2->lock); +} + static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const __be32 daddr, const int dif, const int sdif, bool exact_dif) @@ -207,6 +262,40 @@ static inline int compute_score(struct sock *sk, struct net *net, */ /* called with rcu_read_lock() : No refcount taken on the socket */ +static struct sock *inet_lhash2_lookup(struct net *net, + struct inet_listen_hashbucket *ilb2, + struct sk_buff *skb, int doff, + const __be32 saddr, __be16 sport, + const __be32 daddr, const unsigned short hnum, + const int dif, const int sdif) +{ + bool exact_dif = inet_exact_dif_match(net, skb); + struct inet_connection_sock *icsk; + struct sock *sk, *result = NULL; + int score, hiscore = 0; + u32 phash = 0; + + inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { + sk = (struct sock *)icsk; + score = compute_score(sk, net, hnum, daddr, + dif, sdif, exact_dif); + if (score > hiscore) { + if (sk->sk_reuseport) { + phash = inet_ehashfn(net, daddr, hnum, + saddr, sport); + result = reuseport_select_sock(sk, phash, + skb, doff); + if (result) + return result; + } + result = sk; + hiscore = score; + } + } + + return result; +} + struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -217,10 +306,42 @@ struct sock *__inet_lookup_listener(struct net *net, unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; bool exact_dif = inet_exact_dif_match(net, skb); + struct inet_listen_hashbucket *ilb2; struct sock *sk, *result = NULL; int score, hiscore = 0; + unsigned int hash2; u32 phash = 0; + if (ilb->count <= 10 || !hashinfo->lhash2) + goto port_lookup; + + /* Too many sk in the ilb bucket (which is hashed by port alone). + * Try lhash2 (which is hashed by port and addr) instead. + */ + + hash2 = ipv4_portaddr_hash(net, daddr, hnum); + ilb2 = inet_lhash2_bucket(hashinfo, hash2); + if (ilb2->count > ilb->count) + goto port_lookup; + + result = inet_lhash2_lookup(net, ilb2, skb, doff, + saddr, sport, daddr, hnum, + dif, sdif); + if (result) + return result; + + /* Lookup lhash2 with INADDR_ANY */ + + hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); + ilb2 = inet_lhash2_bucket(hashinfo, hash2); + if (ilb2->count > ilb->count) + goto port_lookup; + + return inet_lhash2_lookup(net, ilb2, skb, doff, + saddr, sport, daddr, hnum, + dif, sdif); + +port_lookup: sk_for_each_rcu(sk, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); @@ -476,6 +597,8 @@ int __inet_hash(struct sock *sk, struct sock *osk) hlist_add_tail_rcu(&sk->sk_node, &ilb->head); else hlist_add_head_rcu(&sk->sk_node, &ilb->head); + inet_hash2(hashinfo, sk); + ilb->count++; sock_set_flag(sk, SOCK_RCU_FREE); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: @@ -502,28 +625,35 @@ EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + struct inet_listen_hashbucket *ilb; spinlock_t *lock; bool listener = false; - int done; if (sk_unhashed(sk)) return; if (sk->sk_state == TCP_LISTEN) { - lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + lock = &ilb->lock; listener = true; } else { lock = inet_ehash_lockp(hashinfo, sk->sk_hash); } spin_lock_bh(lock); + if (sk_unhashed(sk)) + goto unlock; + if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - if (listener) - done = __sk_del_node_init(sk); - else - done = __sk_nulls_del_node_init_rcu(sk); - if (done) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + if (listener) { + inet_unhash2(hashinfo, sk); + __sk_del_node_init(sk); + ilb->count--; + } else { + __sk_nulls_del_node_init_rcu(sk); + } + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); +unlock: spin_unlock_bh(lock); } EXPORT_SYMBOL_GPL(inet_unhash); @@ -658,10 +788,37 @@ void inet_hashinfo_init(struct inet_hashinfo *h) for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); INIT_HLIST_HEAD(&h->listening_hash[i].head); + h->listening_hash[i].count = 0; } + + h->lhash2 = NULL; } EXPORT_SYMBOL_GPL(inet_hashinfo_init); +void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, + unsigned long numentries, int scale, + unsigned long low_limit, + unsigned long high_limit) +{ + unsigned int i; + + h->lhash2 = alloc_large_system_hash(name, + sizeof(*h->lhash2), + numentries, + scale, + 0, + NULL, + &h->lhash2_mask, + low_limit, + high_limit); + + for (i = 0; i <= h->lhash2_mask; i++) { + spin_lock_init(&h->lhash2[i].lock); + INIT_HLIST_HEAD(&h->lhash2[i].head); + h->lhash2[i].count = 0; + } +} + int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) { unsigned int locksz = sizeof(spinlock_t); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index bb6239169b1a..d828821d88d7 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -114,7 +114,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); static void erspan_build_header(struct sk_buff *skb, - __be32 id, u32 index, bool truncate); + __be32 id, u32 index, + bool truncate, bool is_ipv4); static unsigned int ipgre_net_id __read_mostly; static unsigned int gre_tap_net_id __read_mostly; @@ -589,7 +590,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, goto err_free_rt; erspan_build_header(skb, tunnel_id_to_key32(key->tun_id), - ntohl(md->index), truncate); + ntohl(md->index), truncate, true); gre_build_header(skb, 8, TUNNEL_SEQ, htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++)); @@ -668,52 +669,6 @@ free_skb: return NETDEV_TX_OK; } -static inline u8 tos_to_cos(u8 tos) -{ - u8 dscp, cos; - - dscp = tos >> 2; - cos = dscp >> 3; - return cos; -} - -static void erspan_build_header(struct sk_buff *skb, - __be32 id, u32 index, bool truncate) -{ - struct iphdr *iphdr = ip_hdr(skb); - struct ethhdr *eth = eth_hdr(skb); - enum erspan_encap_type enc_type; - struct erspanhdr *ershdr; - struct qtag_prefix { - __be16 eth_type; - __be16 tci; - } *qp; - u16 vlan_tci = 0; - - enc_type = ERSPAN_ENCAP_NOVLAN; - - /* If mirrored packet has vlan tag, extract tci and - * perserve vlan header in the mirrored frame. - */ - if (eth->h_proto == htons(ETH_P_8021Q)) { - qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); - vlan_tci = ntohs(qp->tci); - enc_type = ERSPAN_ENCAP_INFRAME; - } - - skb_push(skb, sizeof(*ershdr)); - ershdr = (struct erspanhdr *)skb->data; - memset(ershdr, 0, sizeof(*ershdr)); - - ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) | - (ERSPAN_VERSION << VER_OFFSET)); - ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) | - ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) | - (enc_type << EN_OFFSET & EN_MASK) | - ((truncate << T_OFFSET) & T_MASK)); - ershdr->md.index = htonl(index & INDEX_MASK); -} - static netdev_tx_t erspan_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -737,7 +692,8 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, } /* Push ERSPAN header */ - erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate); + erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, + truncate, true); tunnel->parms.o_flags &= ~TUNNEL_KEY; __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN)); return NETDEV_TX_OK; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bf97317e6c97..180311636023 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3577,6 +3577,9 @@ void __init tcp_init(void) percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); inet_hashinfo_init(&tcp_hashinfo); + inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash", + thash_entries, 21, /* one slot per 2 MB*/ + 0, 64 * 1024); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 36f857c87fe2..e9c0d1e1772e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -357,18 +357,12 @@ fail: } EXPORT_SYMBOL(udp_lib_get_port); -static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr, - unsigned int port) -{ - return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; -} - int udp_v4_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = - udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); + ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); unsigned int hash2_partial = - udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); + ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; @@ -485,7 +479,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, u32 hash = 0; if (hslot->count > 10) { - hash2 = udp4_portaddr_hash(net, daddr, hnum); + hash2 = ipv4_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) @@ -496,7 +490,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, exact_dif, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; - hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); + hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; /* avoid searching the same slot again. */ if (unlikely(slot2 == old_slot2)) @@ -1761,7 +1755,7 @@ EXPORT_SYMBOL(udp_lib_rehash); static void udp_v4_rehash(struct sock *sk) { - u16 new_hash = udp4_portaddr_hash(sock_net(sk), + u16 new_hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num); udp_lib_rehash(sk, new_hash); @@ -1952,9 +1946,9 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct sk_buff *nskb; if (use_hash2) { - hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & + hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & udptable->mask; - hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask; + hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: hslot = &udptable->hash2[hash2]; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); @@ -2186,7 +2180,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, int dif, int sdif) { unsigned short hnum = ntohs(loc_port); - unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); + unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum); unsigned int slot2 = hash2 & udp_table.mask; struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f49bd7897e95..a5ad8425551a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6595,27 +6595,43 @@ int __init addrconf_init(void) rtnl_af_register(&inet6_ops); - err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo, - 0); + err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK, + NULL, inet6_dump_ifinfo, 0); if (err < 0) goto errout; - /* Only the first call to __rtnl_register can fail */ - __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0); - __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0); - __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, - inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED); - __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, - inet6_dump_ifmcaddr, 0); - __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, - inet6_dump_ifacaddr, 0); - __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, - inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED); - + err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDR, + inet6_rtm_newaddr, NULL, 0); + if (err < 0) + goto errout; + err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDR, + inet6_rtm_deladdr, NULL, 0); + if (err < 0) + goto errout; + err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR, + inet6_rtm_getaddr, inet6_dump_ifaddr, + RTNL_FLAG_DOIT_UNLOCKED); + if (err < 0) + goto errout; + err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST, + NULL, inet6_dump_ifmcaddr, 0); + if (err < 0) + goto errout; + err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST, + NULL, inet6_dump_ifacaddr, 0); + if (err < 0) + goto errout; + err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF, + inet6_netconf_get_devconf, + inet6_netconf_dump_devconf, + RTNL_FLAG_DOIT_UNLOCKED); + if (err < 0) + goto errout; ipv6_addr_label_rtnl_register(); return 0; errout: + rtnl_unregister_all(PF_INET6); rtnl_af_unregister(&inet6_ops); unregister_netdevice_notifier(&ipv6_dev_notf); errlo: diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 00e1f8ee08f8..303fcce5beef 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -549,11 +549,10 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, void __init ipv6_addr_label_rtnl_register(void) { - __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, - NULL, RTNL_FLAG_DOIT_UNLOCKED); - __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, - NULL, RTNL_FLAG_DOIT_UNLOCKED); - __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, - ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED); + rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, + NULL, RTNL_FLAG_DOIT_UNLOCKED); + rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, + NULL, RTNL_FLAG_DOIT_UNLOCKED); + rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, + ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED); } - diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 0d1451381f5c..2febe26de6a1 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -125,6 +125,40 @@ static inline int compute_score(struct sock *sk, struct net *net, } /* called with rcu_read_lock() */ +static struct sock *inet6_lhash2_lookup(struct net *net, + struct inet_listen_hashbucket *ilb2, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + const __be16 sport, const struct in6_addr *daddr, + const unsigned short hnum, const int dif, const int sdif) +{ + bool exact_dif = inet6_exact_dif_match(net, skb); + struct inet_connection_sock *icsk; + struct sock *sk, *result = NULL; + int score, hiscore = 0; + u32 phash = 0; + + inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { + sk = (struct sock *)icsk; + score = compute_score(sk, net, hnum, daddr, dif, sdif, + exact_dif); + if (score > hiscore) { + if (sk->sk_reuseport) { + phash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + result = reuseport_select_sock(sk, phash, + skb, doff); + if (result) + return result; + } + result = sk; + hiscore = score; + } + } + + return result; +} + struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -135,10 +169,42 @@ struct sock *inet6_lookup_listener(struct net *net, unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; bool exact_dif = inet6_exact_dif_match(net, skb); + struct inet_listen_hashbucket *ilb2; struct sock *sk, *result = NULL; int score, hiscore = 0; + unsigned int hash2; u32 phash = 0; + if (ilb->count <= 10 || !hashinfo->lhash2) + goto port_lookup; + + /* Too many sk in the ilb bucket (which is hashed by port alone). + * Try lhash2 (which is hashed by port and addr) instead. + */ + + hash2 = ipv6_portaddr_hash(net, daddr, hnum); + ilb2 = inet_lhash2_bucket(hashinfo, hash2); + if (ilb2->count > ilb->count) + goto port_lookup; + + result = inet6_lhash2_lookup(net, ilb2, skb, doff, + saddr, sport, daddr, hnum, + dif, sdif); + if (result) + return result; + + /* Lookup lhash2 with in6addr_any */ + + hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); + ilb2 = inet_lhash2_bucket(hashinfo, hash2); + if (ilb2->count > ilb->count) + goto port_lookup; + + return inet6_lhash2_lookup(net, ilb2, skb, doff, + saddr, sport, daddr, hnum, + dif, sdif); + +port_lookup: sk_for_each(sk, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index c43cbaedfa35..a64d559fa513 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2142,8 +2142,8 @@ int __init fib6_init(void) if (ret) goto out_kmem_cache_create; - ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, - 0); + ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL, + inet6_dump_fib, 0); if (ret) goto out_unregister_subsys; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4cfd8e0696fe..1510ce9a4e4e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -55,6 +55,8 @@ #include <net/ip6_route.h> #include <net/ip6_tunnel.h> #include <net/gre.h> +#include <net/erspan.h> +#include <net/dst_metadata.h> static bool log_ecn_error = true; @@ -68,11 +70,13 @@ static unsigned int ip6gre_net_id __read_mostly; struct ip6gre_net { struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; + struct ip6_tnl __rcu *collect_md_tun; struct net_device *fb_tunnel_dev; }; static struct rtnl_link_ops ip6gre_link_ops __read_mostly; static struct rtnl_link_ops ip6gre_tap_ops __read_mostly; +static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly; static int ip6gre_tunnel_init(struct net_device *dev); static void ip6gre_tunnel_setup(struct net_device *dev); static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); @@ -121,7 +125,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, unsigned int h1 = HASH_KEY(key); struct ip6_tnl *t, *cand = NULL; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); - int dev_type = (gre_proto == htons(ETH_P_TEB)) ? + int dev_type = (gre_proto == htons(ETH_P_TEB) || + gre_proto == htons(ETH_P_ERSPAN)) ? ARPHRD_ETHER : ARPHRD_IP6GRE; int score, cand_score = 4; @@ -226,6 +231,10 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, if (cand) return cand; + t = rcu_dereference(ign->collect_md_tun); + if (t && t->dev->flags & IFF_UP) + return t; + dev = ign->fb_tunnel_dev; if (dev->flags & IFF_UP) return netdev_priv(dev); @@ -261,6 +270,9 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t); + if (t->parms.collect_md) + rcu_assign_pointer(ign->collect_md_tun, t); + rcu_assign_pointer(t->next, rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } @@ -270,6 +282,9 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t) struct ip6_tnl __rcu **tp; struct ip6_tnl *iter; + if (t->parms.collect_md) + rcu_assign_pointer(ign->collect_md_tun, NULL); + for (tp = ip6gre_bucket(ign, t); (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { @@ -460,6 +475,56 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) &ipv6h->saddr, &ipv6h->daddr, tpi->key, tpi->proto); if (tunnel) { + if (tunnel->parms.collect_md) { + struct metadata_dst *tun_dst; + __be64 tun_id; + __be16 flags; + + flags = tpi->flags; + tun_id = key32_to_tunnel_id(tpi->key); + + tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0); + if (!tun_dst) + return PACKET_REJECT; + + ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); + } else { + ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error); + } + + return PACKET_RCVD; + } + + return PACKET_REJECT; +} + +static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, + struct tnl_ptk_info *tpi) +{ + const struct ipv6hdr *ipv6h; + struct erspanhdr *ershdr; + struct ip6_tnl *tunnel; + __be32 index; + + ipv6h = ipv6_hdr(skb); + ershdr = (struct erspanhdr *)skb->data; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr)))) + return PACKET_REJECT; + + tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK); + index = ershdr->md.index; + + tunnel = ip6gre_tunnel_lookup(skb->dev, + &ipv6h->saddr, &ipv6h->daddr, tpi->key, + tpi->proto); + if (tunnel) { + if (__iptunnel_pull_header(skb, sizeof(*ershdr), + htons(ETH_P_TEB), + false, false) < 0) + return PACKET_REJECT; + + tunnel->parms.index = ntohl(index); ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error); return PACKET_RCVD; @@ -481,6 +546,12 @@ static int gre_rcv(struct sk_buff *skb) if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false)) goto drop; + if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) { + if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD) + return 0; + goto drop; + } + if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD) return 0; @@ -496,6 +567,78 @@ static int gre_handle_offloads(struct sk_buff *skb, bool csum) csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } +static void prepare_ip6gre_xmit_ipv4(struct sk_buff *skb, + struct net_device *dev, + struct flowi6 *fl6, __u8 *dsfield, + int *encap_limit) +{ + const struct iphdr *iph = ip_hdr(skb); + struct ip6_tnl *t = netdev_priv(dev); + + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + *encap_limit = t->parms.encap_limit; + + memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6)); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + *dsfield = ipv4_get_dsfield(iph); + else + *dsfield = ip6_tclass(t->parms.flowinfo); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6->flowi6_mark = skb->mark; + else + fl6->flowi6_mark = t->parms.fwmark; + + fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); +} + +static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb, + struct net_device *dev, + struct flowi6 *fl6, __u8 *dsfield, + int *encap_limit) +{ + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct ip6_tnl *t = netdev_priv(dev); + __u16 offset; + + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ + + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; + + tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + *encap_limit = tel->encap_limit - 1; + } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { + *encap_limit = t->parms.encap_limit; + } + + memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6)); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + *dsfield = ipv6_get_dsfield(ipv6h); + else + *dsfield = ip6_tclass(t->parms.flowinfo); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6->flowlabel |= ip6_flowlabel(ipv6h); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6->flowi6_mark = skb->mark; + else + fl6->flowi6_mark = t->parms.fwmark; + + fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); + + return 0; +} + static netdev_tx_t __gre6_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct flowi6 *fl6, int encap_limit, @@ -517,8 +660,38 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, /* Push GRE header. */ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; - gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, - protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); + + if (tunnel->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + __be16 flags; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || + !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) + return -EINVAL; + + key = &tun_info->key; + memset(fl6, 0, sizeof(*fl6)); + fl6->flowi6_proto = IPPROTO_GRE; + fl6->daddr = key->u.ipv6.dst; + fl6->flowlabel = key->label; + fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); + + dsfield = key->tos; + flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); + tunnel->tun_hlen = gre_calc_hlen(flags); + + gre_build_header(skb, tunnel->tun_hlen, + flags, protocol, + tunnel_id_to_key32(tun_info->key.tun_id), 0); + + } else { + gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, + protocol, tunnel->parms.o_key, + htonl(tunnel->o_seqno)); + } return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, NEXTHDR_GRE); @@ -527,30 +700,17 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - const struct iphdr *iph = ip_hdr(skb); int encap_limit = -1; struct flowi6 fl6; - __u8 dsfield; + __u8 dsfield = 0; __u32 mtu; int err; memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - encap_limit = t->parms.encap_limit; - - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - dsfield = ipv4_get_dsfield(iph); - else - dsfield = ip6_tclass(t->parms.flowinfo); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; - else - fl6.flowi6_mark = t->parms.fwmark; - - fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (!t->parms.collect_md) + prepare_ip6gre_xmit_ipv4(skb, dev, &fl6, + &dsfield, &encap_limit); err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) @@ -574,46 +734,17 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); struct ipv6hdr *ipv6h = ipv6_hdr(skb); int encap_limit = -1; - __u16 offset; struct flowi6 fl6; - __u8 dsfield; + __u8 dsfield = 0; __u32 mtu; int err; if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) return -1; - offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); - /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ - ipv6h = ipv6_hdr(skb); - - if (offset > 0) { - struct ipv6_tlv_tnl_enc_lim *tel; - tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; - if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); - return -1; - } - encap_limit = tel->encap_limit - 1; - } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - encap_limit = t->parms.encap_limit; - - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - dsfield = ipv6_get_dsfield(ipv6h); - else - dsfield = ip6_tclass(t->parms.flowinfo); - - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= ip6_flowlabel(ipv6h); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; - else - fl6.flowi6_mark = t->parms.fwmark; - - fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (!t->parms.collect_md && + prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit)) + return -1; if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) return -1; @@ -660,7 +791,8 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + if (!t->parms.collect_md) + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) @@ -705,6 +837,88 @@ tx_err: return NETDEV_TX_OK; } +static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct ip6_tnl *t = netdev_priv(dev); + struct dst_entry *dst = skb_dst(skb); + struct net_device_stats *stats; + bool truncate = false; + int encap_limit = -1; + __u8 dsfield = false; + struct flowi6 fl6; + int err = -EINVAL; + __u32 mtu; + + if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) + goto tx_err; + + if (gre_handle_offloads(skb, false)) + goto tx_err; + + switch (skb->protocol) { + case htons(ETH_P_IP): + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + prepare_ip6gre_xmit_ipv4(skb, dev, &fl6, + &dsfield, &encap_limit); + break; + case htons(ETH_P_IPV6): + if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) + goto tx_err; + if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, + &dsfield, &encap_limit)) + goto tx_err; + break; + default: + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + break; + } + + if (skb->len > dev->mtu + dev->hard_header_len) { + pskb_trim(skb, dev->mtu + dev->hard_header_len); + truncate = true; + } + + erspan_build_header(skb, t->parms.o_key, t->parms.index, + truncate, false); + t->parms.o_flags &= ~TUNNEL_KEY; + + IPCB(skb)->flags = 0; + fl6.daddr = t->parms.raddr; + + /* Push GRE header. */ + gre_build_header(skb, 8, TUNNEL_SEQ, + htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++)); + + /* TooBig packet may have updated dst->dev's mtu */ + if (dst && dst_mtu(dst) > dst->dev->mtu) + dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); + + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, + NEXTHDR_GRE); + if (err != 0) { + /* XXX: send ICMP error even if DF is not set. */ + if (err == -EMSGSIZE) { + if (skb->protocol == htons(ETH_P_IP)) + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); + else + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + } + + goto tx_err; + } + return NETDEV_TX_OK; + +tx_err: + stats = &t->dev->stats; + stats->tx_errors++; + stats->tx_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; +} + static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) { struct net_device *dev = t->dev; @@ -1048,6 +1262,11 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; + if (tunnel->parms.collect_md) { + dev->features |= NETIF_F_NETNS_LOCAL; + netif_keep_dst(dev); + } + return 0; } @@ -1062,6 +1281,9 @@ static int ip6gre_tunnel_init(struct net_device *dev) tunnel = netdev_priv(dev); + if (tunnel->parms.collect_md) + return 0; + memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); @@ -1084,7 +1306,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) dev_hold(dev); } - static struct inet6_protocol ip6gre_protocol __read_mostly = { .handler = gre_rcv, .err_handler = ip6gre_err, @@ -1099,7 +1320,8 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &ip6gre_link_ops || - dev->rtnl_link_ops == &ip6gre_tap_ops) + dev->rtnl_link_ops == &ip6gre_tap_ops || + dev->rtnl_link_ops == &ip6erspan_tap_ops) unregister_netdevice_queue(dev, head); for (prio = 0; prio < 4; prio++) { @@ -1221,6 +1443,47 @@ out: return ip6gre_tunnel_validate(tb, data, extack); } +static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + __be16 flags = 0; + int ret; + + if (!data) + return 0; + + ret = ip6gre_tap_validate(tb, data, extack); + if (ret) + return ret; + + /* ERSPAN should only have GRE sequence and key flag */ + if (data[IFLA_GRE_OFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); + if (data[IFLA_GRE_IFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); + if (!data[IFLA_GRE_COLLECT_METADATA] && + flags != (GRE_SEQ | GRE_KEY)) + return -EINVAL; + + /* ERSPAN Session ID only has 10-bit. Since we reuse + * 32-bit key field as ID, check it's range. + */ + if (data[IFLA_GRE_IKEY] && + (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK)) + return -EINVAL; + + if (data[IFLA_GRE_OKEY] && + (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK)) + return -EINVAL; + + if (data[IFLA_GRE_ERSPAN_INDEX]) { + u32 index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); + + if (index & ~INDEX_MASK) + return -EINVAL; + } + return 0; +} static void ip6gre_netlink_parms(struct nlattr *data[], struct __ip6_tnl_parm *parms) @@ -1267,6 +1530,12 @@ static void ip6gre_netlink_parms(struct nlattr *data[], if (data[IFLA_GRE_FWMARK]) parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); + + if (data[IFLA_GRE_ERSPAN_INDEX]) + parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); + + if (data[IFLA_GRE_COLLECT_METADATA]) + parms->collect_md = true; } static int ip6gre_tap_init(struct net_device *dev) @@ -1303,6 +1572,59 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { NETIF_F_HIGHDMA | \ NETIF_F_HW_CSUM) +static int ip6erspan_tap_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + int t_hlen; + int ret; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + tunnel->net = dev_net(dev); + strcpy(tunnel->parms.name, dev->name); + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); + if (ret) { + free_percpu(dev->tstats); + dev->tstats = NULL; + return ret; + } + + tunnel->tun_hlen = 8; + tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + + sizeof(struct erspanhdr); + t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); + + dev->hard_header_len = LL_MAX_HEADER + t_hlen; + dev->mtu = ETH_DATA_LEN - t_hlen; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; + if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + tunnel = netdev_priv(dev); + ip6gre_tnl_link_config(tunnel, 1); + + return 0; +} + +static const struct net_device_ops ip6erspan_netdev_ops = { + .ndo_init = ip6erspan_tap_init, + .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_start_xmit = ip6erspan_tunnel_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ip6_tnl_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip6_tnl_get_iflink, +}; + static void ip6gre_tap_setup(struct net_device *dev) { @@ -1372,8 +1694,13 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, ip6gre_netlink_parms(data, &nt->parms); - if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) - return -EEXIST; + if (nt->parms.collect_md) { + if (rtnl_dereference(ign->collect_md_tun)) + return -EEXIST; + } else { + if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) + return -EEXIST; + } if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) eth_hw_addr_random(dev); @@ -1492,8 +1819,12 @@ static size_t ip6gre_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_GRE_ENCAP_DPORT */ nla_total_size(2) + + /* IFLA_GRE_COLLECT_METADATA */ + nla_total_size(0) + /* IFLA_GRE_FWMARK */ nla_total_size(4) + + /* IFLA_GRE_ERSPAN_INDEX */ + nla_total_size(4) + 0; } @@ -1515,7 +1846,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) || - nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark)) + nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) || + nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, @@ -1528,6 +1860,11 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) t->encap.flags)) goto nla_put_failure; + if (p->collect_md) { + if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA)) + goto nla_put_failure; + } + return 0; nla_put_failure: @@ -1550,9 +1887,25 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = { [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, + [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, }; +static void ip6erspan_tap_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->netdev_ops = &ip6erspan_netdev_ops; + dev->needs_free_netdev = true; + dev->priv_destructor = ip6gre_dev_free; + + dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netif_keep_dst(dev); +} + static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { .kind = "ip6gre", .maxtype = IFLA_GRE_MAX, @@ -1582,6 +1935,20 @@ static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { .get_link_net = ip6_tnl_get_link_net, }; +static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = { + .kind = "ip6erspan", + .maxtype = IFLA_GRE_MAX, + .policy = ip6gre_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6erspan_tap_setup, + .validate = ip6erspan_tap_validate, + .newlink = ip6gre_newlink, + .changelink = ip6gre_changelink, + .get_size = ip6gre_get_size, + .fill_info = ip6gre_fill_info, + .get_link_net = ip6_tnl_get_link_net, +}; + /* * And now the modules code and kernel interface. */ @@ -1610,9 +1977,15 @@ static int __init ip6gre_init(void) if (err < 0) goto tap_ops_failed; + err = rtnl_link_register(&ip6erspan_tap_ops); + if (err < 0) + goto erspan_link_failed; + out: return err; +erspan_link_failed: + rtnl_link_unregister(&ip6gre_tap_ops); tap_ops_failed: rtnl_link_unregister(&ip6gre_link_ops); rtnl_link_failed: @@ -1626,6 +1999,7 @@ static void __exit ip6gre_fini(void) { rtnl_link_unregister(&ip6gre_tap_ops); rtnl_link_unregister(&ip6gre_link_ops); + rtnl_link_unregister(&ip6erspan_tap_ops); inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE); unregister_pernet_device(&ip6gre_net_ops); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3d3092adf1d2..6a3b1a54a952 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -861,7 +861,7 @@ int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb, struct metadata_dst *tun_dst, bool log_ecn_err) { - return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate, + return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate, log_ecn_err); } EXPORT_SYMBOL(ip6_tnl_rcv); @@ -979,6 +979,9 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, int ret = 0; struct net *net = t->net; + if (t->parms.collect_md) + return 1; + if ((p->flags & IP6_TNL_F_CAP_XMIT) || ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 46fd53b268da..b3f4d19b3ca5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4772,11 +4772,20 @@ int __init ip6_route_init(void) if (ret) goto fib6_rules_init; - ret = -ENOBUFS; - if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) || - __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) || - __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, - RTNL_FLAG_DOIT_UNLOCKED)) + ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE, + inet6_rtm_newroute, NULL, 0); + if (ret < 0) + goto out_register_late_subsys; + + ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE, + inet6_rtm_delroute, NULL, 0); + if (ret < 0) + goto out_register_late_subsys; + + ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, + inet6_rtm_getroute, NULL, + RTNL_FLAG_DOIT_UNLOCKED); + if (ret < 0) goto out_register_late_subsys; ret = register_netdevice_notifier(&ip6_route_dev_notifier); @@ -4794,6 +4803,7 @@ out: return ret; out_register_late_subsys: + rtnl_unregister_all(PF_INET6); unregister_pernet_subsys(&ip6_route_net_late_ops); fib6_rules_init: fib6_rules_cleanup(); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c9f91c28b81d..eecf9f0faf29 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -89,28 +89,12 @@ static u32 udp6_ehashfn(const struct net *net, udp_ipv6_hash_secret + net_hash_mix(net)); } -static u32 udp6_portaddr_hash(const struct net *net, - const struct in6_addr *addr6, - unsigned int port) -{ - unsigned int hash, mix = net_hash_mix(net); - - if (ipv6_addr_any(addr6)) - hash = jhash_1word(0, mix); - else if (ipv6_addr_v4mapped(addr6)) - hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix); - else - hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix); - - return hash ^ port; -} - int udp_v6_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = - udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); + ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum); unsigned int hash2_partial = - udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0); + ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0); /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; @@ -119,7 +103,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) static void udp_v6_rehash(struct sock *sk) { - u16 new_hash = udp6_portaddr_hash(sock_net(sk), + u16 new_hash = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); @@ -225,7 +209,7 @@ struct sock *__udp6_lib_lookup(struct net *net, u32 hash = 0; if (hslot->count > 10) { - hash2 = udp6_portaddr_hash(net, daddr, hnum); + hash2 = ipv6_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) @@ -236,7 +220,7 @@ struct sock *__udp6_lib_lookup(struct net *net, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; - hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); + hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); slot2 = hash2 & udptable->mask; /* avoid searching the same slot again. */ if (unlikely(slot2 == old_slot2)) @@ -705,9 +689,9 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct sk_buff *nskb; if (use_hash2) { - hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & + hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) & udptable->mask; - hash2 = udp6_portaddr_hash(net, daddr, hnum) & udptable->mask; + hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: hslot = &udptable->hash2[hash2]; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); @@ -895,7 +879,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, int dif, int sdif) { unsigned short hnum = ntohs(loc_port); - unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum); + unsigned int hash2 = ipv6_portaddr_hash(net, loc_addr, hnum); unsigned int slot2 = hash2 & udp_table.mask; struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 8ca9915befc8..5dce8336d33f 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2510,12 +2510,15 @@ static int __init mpls_init(void) rtnl_af_register(&mpls_af_ops); - rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0); - rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0); - rtnl_register(PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes, - 0); - rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf, - mpls_netconf_dump_devconf, 0); + rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_NEWROUTE, + mpls_rtm_newroute, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_DELROUTE, + mpls_rtm_delroute, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETROUTE, + mpls_getroute, mpls_dump_routes, 0); + rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETNETCONF, + mpls_netconf_get_devconf, + mpls_netconf_dump_devconf, 0); err = ipgre_tunnel_encap_add_mpls_ops(); if (err) pr_err("Can't add mpls over gre tunnel ops\n"); diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 04a3128adcf0..3e7747549f90 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -126,18 +126,12 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) } } -static void internal_set_rx_headroom(struct net_device *dev, int new_hr) -{ - dev->needed_headroom = new_hr < 0 ? 0 : new_hr; -} - static const struct net_device_ops internal_dev_netdev_ops = { .ndo_open = internal_dev_open, .ndo_stop = internal_dev_stop, .ndo_start_xmit = internal_dev_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_get_stats64 = internal_get_stats, - .ndo_set_rx_headroom = internal_set_rx_headroom, }; static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { @@ -154,7 +148,7 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | - IFF_PHONY_HEADROOM | IFF_NO_QUEUE; + IFF_NO_QUEUE; netdev->needs_free_netdev = true; netdev->priv_destructor = internal_dev_destructor; netdev->ethtool_ops = &internal_dev_ethtool_ops; @@ -195,7 +189,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) err = -ENOMEM; goto error_free_netdev; } - vport->dev->needed_headroom = vport->dp->max_headroom; dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); internal_dev = internal_dev_priv(vport->dev); diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index da754fc926e7..871eaf2cb85e 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -299,16 +299,21 @@ out: int __init phonet_netlink_register(void) { - int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, - NULL, 0); + int err = rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_NEWADDR, + addr_doit, NULL, 0); if (err) return err; - /* Further __rtnl_register() cannot fail */ - __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, 0); - __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, 0); - __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, 0); - __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0); - __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, 0); + /* Further rtnl_register_module() cannot fail */ + rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELADDR, + addr_doit, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETADDR, + NULL, getaddr_dumpit, 0); + rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_NEWROUTE, + route_doit, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELROUTE, + route_doit, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETROUTE, + NULL, route_dumpit, 0); return 0; } diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 77ab05e23001..5fb3929e3d7d 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1116,9 +1116,13 @@ static int __init qrtr_proto_init(void) return rc; } - rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0); + rc = rtnl_register_module(THIS_MODULE, PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0); + if (rc) { + sock_unregister(qrtr_family.family); + proto_unregister(&qrtr_proto); + } - return 0; + return rc; } postcore_initcall(qrtr_proto_init); diff --git a/net/rds/connection.c b/net/rds/connection.c index 7ee2d5d68b78..9efc82c665b5 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -366,6 +366,8 @@ void rds_conn_shutdown(struct rds_conn_path *cp) * to the conn hash, so we never trigger a reconnect on this * conn - the reconnect is always triggered by the active peer. */ cancel_delayed_work_sync(&cp->cp_conn_w); + if (conn->c_destroy_in_prog) + return; rcu_read_lock(); if (!hlist_unhashed(&conn->c_hash_node)) { rcu_read_unlock(); @@ -445,7 +447,6 @@ void rds_conn_destroy(struct rds_connection *conn) */ rds_cong_remove_conn(conn); - put_net(conn->c_net); kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); diff --git a/net/rds/rds.h b/net/rds/rds.h index c349c71babff..d09f6c1facb4 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -150,7 +150,7 @@ struct rds_connection { /* Protocol version */ unsigned int c_version; - struct net *c_net; + possible_net_t c_net; struct list_head c_map_item; unsigned long c_map_queued; @@ -165,13 +165,13 @@ struct rds_connection { static inline struct net *rds_conn_net(struct rds_connection *conn) { - return conn->c_net; + return read_pnet(&conn->c_net); } static inline void rds_conn_net_set(struct rds_connection *conn, struct net *net) { - conn->c_net = get_net(net); + write_pnet(&conn->c_net, net); } #define RDS_FLAG_CONG_BITMAP 0x01 diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 6b7ee71f40c6..39f502d47969 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -306,7 +306,8 @@ static void rds_tcp_conn_free(void *arg) rdsdebug("freeing tc %p\n", tc); spin_lock_irqsave(&rds_tcp_conn_lock, flags); - list_del(&tc->t_tcp_node); + if (!tc->t_tcp_node_detached) + list_del(&tc->t_tcp_node); spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); kmem_cache_free(rds_tcp_conn_slab, tc); @@ -495,27 +496,6 @@ static struct pernet_operations rds_tcp_net_ops = { .size = sizeof(struct rds_tcp_net), }; -/* explicitly send a RST on each socket, thereby releasing any socket refcnts - * that may otherwise hold up netns deletion. - */ -static void rds_tcp_conn_paths_destroy(struct rds_connection *conn) -{ - struct rds_conn_path *cp; - struct rds_tcp_connection *tc; - int i; - struct sock *sk; - - for (i = 0; i < RDS_MPATH_WORKERS; i++) { - cp = &conn->c_path[i]; - tc = cp->cp_transport_data; - if (!tc->t_sock) - continue; - sk = tc->t_sock->sk; - sk->sk_prot->disconnect(sk, 0); - tcp_done(sk); - } -} - static void rds_tcp_kill_sock(struct net *net) { struct rds_tcp_connection *tc, *_tc; @@ -527,18 +507,20 @@ static void rds_tcp_kill_sock(struct net *net) rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = tc->t_cpath->cp_conn->c_net; + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; - if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) + if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) { list_move_tail(&tc->t_tcp_node, &tmp_list); + } else { + list_del(&tc->t_tcp_node); + tc->t_tcp_node_detached = true; + } } spin_unlock_irq(&rds_tcp_conn_lock); - list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { - rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn); + list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) rds_conn_destroy(tc->t_cpath->cp_conn); - } } void *rds_tcp_listen_sock_def_readable(struct net *net) @@ -586,7 +568,7 @@ static void rds_tcp_sysctl_reset(struct net *net) spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = tc->t_cpath->cp_conn->c_net; + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 1aafbf7c3011..e7858ee8ed8b 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -12,6 +12,7 @@ struct rds_tcp_incoming { struct rds_tcp_connection { struct list_head t_tcp_node; + bool t_tcp_node_detached; struct rds_conn_path *t_cpath; /* t_conn_path_lock synchronizes the connection establishment between * rds_tcp_accept_one and rds_tcp_conn_path_connect diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 329325bd553e..37892b3909af 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -1,7 +1,7 @@ /* * net/tipc/bcast.c: TIPC broadcast code * - * Copyright (c) 2004-2006, 2014-2016, Ericsson AB + * Copyright (c) 2004-2006, 2014-2017, Ericsson AB * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. @@ -42,8 +42,8 @@ #include "link.h" #include "name_table.h" -#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */ -#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ +#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */ +#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ const char tipc_bclink_name[] = "broadcast-link"; @@ -74,6 +74,10 @@ static struct tipc_bc_base *tipc_bc_base(struct net *net) return tipc_net(net)->bcbase; } +/* tipc_bcast_get_mtu(): -get the MTU currently used by broadcast link + * Note: the MTU is decremented to give room for a tunnel header, in + * case the message needs to be sent as replicast + */ int tipc_bcast_get_mtu(struct net *net) { return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE; @@ -515,7 +519,7 @@ int tipc_bcast_init(struct net *net) spin_lock_init(&tipc_net(net)->bclock); if (!tipc_link_bc_create(net, 0, 0, - U16_MAX, + FB_MTU, BCLINK_WIN_DEFAULT, 0, &bb->inputq, diff --git a/net/tipc/link.c b/net/tipc/link.c index 6bce0b1117bd..2d6b2aed30e0 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -483,7 +483,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, /** * tipc_link_bc_create - create new link to be used for broadcast * @n: pointer to associated node - * @mtu: mtu to be used + * @mtu: mtu to be used initially if no peers * @window: send window to be used * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery diff --git a/net/tipc/msg.c b/net/tipc/msg.c index b0d07b35909d..55d8ba92291d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -251,20 +251,23 @@ bool tipc_msg_validate(struct sk_buff **_skb) * @pktmax: Max packet size that can be used * @list: Buffer or chain of buffers to be returned to caller * + * Note that the recursive call we are making here is safe, since it can + * logically go only one further level down. + * * Returns message data size or errno: -ENOMEM, -EFAULT */ -int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, - int offset, int dsz, int pktmax, struct sk_buff_head *list) +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, + int dsz, int pktmax, struct sk_buff_head *list) { int mhsz = msg_hdr_sz(mhdr); + struct tipc_msg pkthdr; int msz = mhsz + dsz; - int pktno = 1; - int pktsz; int pktrem = pktmax; - int drem = dsz; - struct tipc_msg pkthdr; struct sk_buff *skb; + int drem = dsz; + int pktno = 1; char *pktpos; + int pktsz; int rc; msg_set_size(mhdr, msz); @@ -272,8 +275,18 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, /* No fragmentation needed? */ if (likely(msz <= pktmax)) { skb = tipc_buf_acquire(msz, GFP_KERNEL); - if (unlikely(!skb)) + + /* Fall back to smaller MTU if node local message */ + if (unlikely(!skb)) { + if (pktmax != MAX_MSG_SIZE) + return -ENOMEM; + rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list); + if (rc != dsz) + return rc; + if (tipc_msg_assemble(list)) + return dsz; return -ENOMEM; + } skb_orphan(skb); __skb_queue_tail(list, skb); skb_copy_to_linear_data(skb, mhdr, mhsz); @@ -589,6 +602,30 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) return true; } +/* tipc_msg_assemble() - assemble chain of fragments into one message + */ +bool tipc_msg_assemble(struct sk_buff_head *list) +{ + struct sk_buff *skb, *tmp = NULL; + + if (skb_queue_len(list) == 1) + return true; + + while ((skb = __skb_dequeue(list))) { + skb->next = NULL; + if (tipc_buf_append(&tmp, &skb)) { + __skb_queue_tail(list, skb); + return true; + } + if (!tmp) + break; + } + __skb_queue_purge(list); + __skb_queue_head_init(list); + pr_warn("Failed do assemble buffer\n"); + return false; +} + /* tipc_msg_reassemble() - clone a buffer chain of fragments and * reassemble the clones into one message */ diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 3e4384c222f7..b4ba1b4f9ae7 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -98,7 +98,7 @@ struct plist; #define MAX_H_SIZE 60 /* Largest possible TIPC header size */ #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) - +#define FB_MTU 3744 #define TIPC_MEDIA_INFO_OFFSET 5 struct tipc_skb_cb { @@ -943,6 +943,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); +bool tipc_msg_assemble(struct sk_buff_head *list); bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq); bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, struct sk_buff_head *cpy); diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c index 370b749f5ee6..15a469220e19 100644 --- a/samples/bpf/tcbpf2_kern.c +++ b/samples/bpf/tcbpf2_kern.c @@ -81,6 +81,49 @@ int _gre_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("ip6gretap_set_tunnel") +int _ip6gretap_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = _htonl(0x11); /* ::11 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + key.tunnel_label = 0xabcde; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip6gretap_get_tunnel") +int _ip6gretap_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "key %d remote ip6 ::%x label %x\n"; + struct bpf_tunnel_key key; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv6[3], key.tunnel_label); + + return TC_ACT_OK; +} + SEC("erspan_set_tunnel") int _erspan_set_tunnel(struct __sk_buff *skb) { diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh index 312e1722a39f..226f45381b76 100755 --- a/samples/bpf/test_tunnel_bpf.sh +++ b/samples/bpf/test_tunnel_bpf.sh @@ -33,6 +33,30 @@ function add_gre_tunnel { ip addr add dev $DEV 10.1.1.200/24 } +function add_ip6gretap_tunnel { + + # assign ipv6 address + ip netns exec at_ns0 ip addr add ::11/96 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + ip addr add dev veth1 ::22/96 + ip link set dev veth1 up + + # in namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE flowlabel 0xbcdef key 2 \ + local ::11 remote ::22 + + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96 + ip netns exec at_ns0 ip link set dev $DEV_NS up + + # out of namespace + ip link add dev $DEV type $TYPE external + ip addr add dev $DEV 10.1.1.200/24 + ip addr add dev $DEV fc80::200/24 + ip link set dev $DEV up +} + function add_erspan_tunnel { # in namespace ip netns exec at_ns0 \ @@ -113,6 +137,41 @@ function test_gre { cleanup } +function test_ip6gre { + TYPE=ip6gre + DEV_NS=ip6gre00 + DEV=ip6gre11 + config_device + # reuse the ip6gretap function + add_ip6gretap_tunnel + attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel + # underlay + ping6 -c 4 ::11 + # overlay: ipv4 over ipv6 + ip netns exec at_ns0 ping -c 1 10.1.1.200 + ping -c 1 10.1.1.100 + # overlay: ipv6 over ipv6 + ip netns exec at_ns0 ping6 -c 1 fc80::200 + cleanup +} + +function test_ip6gretap { + TYPE=ip6gretap + DEV_NS=ip6gretap00 + DEV=ip6gretap11 + config_device + add_ip6gretap_tunnel + attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel + # underlay + ping6 -c 4 ::11 + # overlay: ipv4 over ipv6 + ip netns exec at_ns0 ping -i .2 -c 1 10.1.1.200 + ping -c 1 10.1.1.100 + # overlay: ipv6 over ipv6 + ip netns exec at_ns0 ping6 -c 1 fc80::200 + cleanup +} + function test_erspan { TYPE=erspan DEV_NS=erspan00 @@ -175,6 +234,8 @@ function cleanup { ip link del veth1 ip link del ipip11 ip link del gretap11 + ip link del ip6gre11 + ip link del ip6gretap11 ip link del vxlan11 ip link del geneve11 ip link del erspan11 @@ -187,6 +248,10 @@ trap cleanup 0 2 3 6 9 cleanup echo "Testing GRE tunnel..." test_gre +echo "Testing IP6GRE tunnel..." +test_ip6gre +echo "Testing IP6GRETAP tunnel..." +test_ip6gretap echo "Testing ERSPAN tunnel..." test_erspan echo "Testing VXLAN tunnel..." |