diff options
78 files changed, 1000 insertions, 539 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 579a8fd74be0..be2e7a2b10d7 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -269,7 +269,7 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ offsetof(struct bpf_array, map.max_entries)); EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ -#define OFFSET1 44 /* number of bytes to jump */ +#define OFFSET1 47 /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -278,15 +278,15 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 33 +#define OFFSET2 36 EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */ /* prog = array->prog[index]; */ - EMIT4(0x48, 0x8D, 0x44, 0xD6); /* lea rax, [rsi + rdx * 8 + 0x50] */ - EMIT1(offsetof(struct bpf_array, prog)); + EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */ + offsetof(struct bpf_array, prog)); EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ /* if (prog == NULL) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index caeb39561567..bf9eb2ecf960 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -104,6 +104,57 @@ static void *macb_rx_buffer(struct macb *bp, unsigned int index) return bp->rx_buffers + bp->rx_buffer_size * macb_rx_ring_wrap(index); } +/* I/O accessors */ +static u32 hw_readl_native(struct macb *bp, int offset) +{ + return __raw_readl(bp->regs + offset); +} + +static void hw_writel_native(struct macb *bp, int offset, u32 value) +{ + __raw_writel(value, bp->regs + offset); +} + +static u32 hw_readl(struct macb *bp, int offset) +{ + return readl_relaxed(bp->regs + offset); +} + +static void hw_writel(struct macb *bp, int offset, u32 value) +{ + writel_relaxed(value, bp->regs + offset); +} + +/* + * Find the CPU endianness by using the loopback bit of NCR register. When the + * CPU is in big endian we need to program swaped mode for management + * descriptor access. + */ +static bool hw_is_native_io(void __iomem *addr) +{ + u32 value = MACB_BIT(LLB); + + __raw_writel(value, addr + MACB_NCR); + value = __raw_readl(addr + MACB_NCR); + + /* Write 0 back to disable everything */ + __raw_writel(0, addr + MACB_NCR); + + return value == MACB_BIT(LLB); +} + +static bool hw_is_gem(void __iomem *addr, bool native_io) +{ + u32 id; + + if (native_io) + id = __raw_readl(addr + MACB_MID); + else + id = readl_relaxed(addr + MACB_MID); + + return MACB_BFEXT(IDNUM, id) >= 0x2; +} + static void macb_set_hwaddr(struct macb *bp) { u32 bottom; @@ -160,7 +211,7 @@ static void macb_get_hwaddr(struct macb *bp) } } - netdev_info(bp->dev, "invalid hw address, using random\n"); + dev_info(&bp->pdev->dev, "invalid hw address, using random\n"); eth_hw_addr_random(bp->dev); } @@ -252,7 +303,6 @@ static void macb_handle_link_change(struct net_device *dev) struct macb *bp = netdev_priv(dev); struct phy_device *phydev = bp->phy_dev; unsigned long flags; - int status_change = 0; spin_lock_irqsave(&bp->lock, flags); @@ -449,14 +499,14 @@ err_out: static void macb_update_stats(struct macb *bp) { - u32 __iomem *reg = bp->regs + MACB_PFR; u32 *p = &bp->hw_stats.macb.rx_pause_frames; u32 *end = &bp->hw_stats.macb.tx_pause_frames + 1; + int offset = MACB_PFR; WARN_ON((unsigned long)(end - p - 1) != (MACB_TPF - MACB_PFR) / 4); - for(; p < end; p++, reg++) - *p += readl_relaxed(reg); + for(; p < end; p++, offset += 4) + *p += bp->macb_reg_readl(bp, offset); } static int macb_halt_tx(struct macb *bp) @@ -1107,12 +1157,6 @@ static void macb_poll_controller(struct net_device *dev) } #endif -static inline unsigned int macb_count_tx_descriptors(struct macb *bp, - unsigned int len) -{ - return (len + bp->max_tx_length - 1) / bp->max_tx_length; -} - static unsigned int macb_tx_map(struct macb *bp, struct macb_queue *queue, struct sk_buff *skb) @@ -1263,11 +1307,11 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) * socket buffer: skb fragments of jumbo frames may need to be * splitted into many buffer descriptors. */ - count = macb_count_tx_descriptors(bp, skb_headlen(skb)); + count = DIV_ROUND_UP(skb_headlen(skb), bp->max_tx_length); nr_frags = skb_shinfo(skb)->nr_frags; for (f = 0; f < nr_frags; f++) { frag_size = skb_frag_size(&skb_shinfo(skb)->frags[f]); - count += macb_count_tx_descriptors(bp, frag_size); + count += DIV_ROUND_UP(frag_size, bp->max_tx_length); } spin_lock_irqsave(&bp->lock, flags); @@ -1603,7 +1647,6 @@ static u32 macb_dbw(struct macb *bp) static void macb_configure_dma(struct macb *bp) { u32 dmacfg; - u32 tmp, ncr; if (macb_is_gem(bp)) { dmacfg = gem_readl(bp, DMACFG) & ~GEM_BF(RXBS, -1L); @@ -1613,22 +1656,11 @@ static void macb_configure_dma(struct macb *bp) dmacfg |= GEM_BIT(TXPBMS) | GEM_BF(RXBMS, -1L); dmacfg &= ~GEM_BIT(ENDIA_PKT); - /* Find the CPU endianness by using the loopback bit of net_ctrl - * register. save it first. When the CPU is in big endian we - * need to program swaped mode for management descriptor access. - */ - ncr = macb_readl(bp, NCR); - __raw_writel(MACB_BIT(LLB), bp->regs + MACB_NCR); - tmp = __raw_readl(bp->regs + MACB_NCR); - - if (tmp == MACB_BIT(LLB)) + if (bp->native_io) dmacfg &= ~GEM_BIT(ENDIA_DESC); else dmacfg |= GEM_BIT(ENDIA_DESC); /* CPU in big endian */ - /* Restore net_ctrl */ - macb_writel(bp, NCR, ncr); - if (bp->dev->features & NETIF_F_HW_CSUM) dmacfg |= GEM_BIT(TXCOEN); else @@ -1897,19 +1929,19 @@ static int macb_change_mtu(struct net_device *dev, int new_mtu) static void gem_update_stats(struct macb *bp) { - int i; + unsigned int i; u32 *p = &bp->hw_stats.gem.tx_octets_31_0; for (i = 0; i < GEM_STATS_LEN; ++i, ++p) { u32 offset = gem_statistics[i].offset; - u64 val = readl_relaxed(bp->regs + offset); + u64 val = bp->macb_reg_readl(bp, offset); bp->ethtool_stats[i] += val; *p += val; if (offset == GEM_OCTTXL || offset == GEM_OCTRXL) { /* Add GEM_OCTTXH, GEM_OCTRXH */ - val = readl_relaxed(bp->regs + offset + 4); + val = bp->macb_reg_readl(bp, offset + 4); bp->ethtool_stats[i] += ((u64)val) << 32; *(++p) += val; } @@ -1976,7 +2008,7 @@ static int gem_get_sset_count(struct net_device *dev, int sset) static void gem_get_ethtool_strings(struct net_device *dev, u32 sset, u8 *p) { - int i; + unsigned int i; switch (sset) { case ETH_SS_STATS: @@ -2190,7 +2222,7 @@ static void macb_configure_caps(struct macb *bp, const struct macb_config *dt_co if (dt_conf) bp->caps = dt_conf->caps; - if (macb_is_gem_hw(bp->regs)) { + if (hw_is_gem(bp->regs, bp->native_io)) { bp->caps |= MACB_CAPS_MACB_IS_GEM; dcfg = gem_readl(bp, DCFG1); @@ -2201,10 +2233,11 @@ static void macb_configure_caps(struct macb *bp, const struct macb_config *dt_co bp->caps |= MACB_CAPS_FIFO_MODE; } - netdev_dbg(bp->dev, "Cadence caps 0x%08x\n", bp->caps); + dev_dbg(&bp->pdev->dev, "Cadence caps 0x%08x\n", bp->caps); } static void macb_probe_queues(void __iomem *mem, + bool native_io, unsigned int *queue_mask, unsigned int *num_queues) { @@ -2219,7 +2252,7 @@ static void macb_probe_queues(void __iomem *mem, * we are early in the probe process and don't have the * MACB_CAPS_MACB_IS_GEM flag positioned */ - if (!macb_is_gem_hw(mem)) + if (!hw_is_gem(mem, native_io)) return; /* bit 0 is never set but queue 0 always exists */ @@ -2786,6 +2819,7 @@ static int macb_probe(struct platform_device *pdev) struct clk *pclk, *hclk, *tx_clk; unsigned int queue_mask, num_queues; struct macb_platform_data *pdata; + bool native_io; struct phy_device *phydev; struct net_device *dev; struct resource *regs; @@ -2794,6 +2828,11 @@ static int macb_probe(struct platform_device *pdev) struct macb *bp; int err; + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + mem = devm_ioremap_resource(&pdev->dev, regs); + if (IS_ERR(mem)) + return PTR_ERR(mem); + if (np) { const struct of_device_id *match; @@ -2809,14 +2848,9 @@ static int macb_probe(struct platform_device *pdev) if (err) return err; - regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - mem = devm_ioremap_resource(&pdev->dev, regs); - if (IS_ERR(mem)) { - err = PTR_ERR(mem); - goto err_disable_clocks; - } + native_io = hw_is_native_io(mem); - macb_probe_queues(mem, &queue_mask, &num_queues); + macb_probe_queues(mem, native_io, &queue_mask, &num_queues); dev = alloc_etherdev_mq(sizeof(*bp), num_queues); if (!dev) { err = -ENOMEM; @@ -2831,6 +2865,14 @@ static int macb_probe(struct platform_device *pdev) bp->pdev = pdev; bp->dev = dev; bp->regs = mem; + bp->native_io = native_io; + if (native_io) { + bp->macb_reg_readl = hw_readl_native; + bp->macb_reg_writel = hw_writel_native; + } else { + bp->macb_reg_readl = hw_readl; + bp->macb_reg_writel = hw_writel; + } bp->num_queues = num_queues; bp->queue_mask = queue_mask; if (macb_config) @@ -2838,9 +2880,8 @@ static int macb_probe(struct platform_device *pdev) bp->pclk = pclk; bp->hclk = hclk; bp->tx_clk = tx_clk; - if (macb_config->jumbo_max_len) { + if (macb_config) bp->jumbo_max_len = macb_config->jumbo_max_len; - } spin_lock_init(&bp->lock); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index d74655993d4b..1895b6b2addd 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -429,18 +429,12 @@ | GEM_BF(name, value)) /* Register access macros */ -#define macb_readl(port,reg) \ - readl_relaxed((port)->regs + MACB_##reg) -#define macb_writel(port,reg,value) \ - writel_relaxed((value), (port)->regs + MACB_##reg) -#define gem_readl(port, reg) \ - readl_relaxed((port)->regs + GEM_##reg) -#define gem_writel(port, reg, value) \ - writel_relaxed((value), (port)->regs + GEM_##reg) -#define queue_readl(queue, reg) \ - readl_relaxed((queue)->bp->regs + (queue)->reg) -#define queue_writel(queue, reg, value) \ - writel_relaxed((value), (queue)->bp->regs + (queue)->reg) +#define macb_readl(port, reg) (port)->macb_reg_readl((port), MACB_##reg) +#define macb_writel(port, reg, value) (port)->macb_reg_writel((port), MACB_##reg, (value)) +#define gem_readl(port, reg) (port)->macb_reg_readl((port), GEM_##reg) +#define gem_writel(port, reg, value) (port)->macb_reg_writel((port), GEM_##reg, (value)) +#define queue_readl(queue, reg) (queue)->bp->macb_reg_readl((queue)->bp, (queue)->reg) +#define queue_writel(queue, reg, value) (queue)->bp->macb_reg_writel((queue)->bp, (queue)->reg, (value)) /* Conditional GEM/MACB macros. These perform the operation to the correct * register dependent on whether the device is a GEM or a MACB. For registers @@ -785,6 +779,11 @@ struct macb_queue { struct macb { void __iomem *regs; + bool native_io; + + /* hardware IO accessors */ + u32 (*macb_reg_readl)(struct macb *bp, int offset); + void (*macb_reg_writel)(struct macb *bp, int offset, u32 value); unsigned int rx_tail; unsigned int rx_prepared_head; @@ -817,9 +816,9 @@ struct macb { struct mii_bus *mii_bus; struct phy_device *phy_dev; - unsigned int link; - unsigned int speed; - unsigned int duplex; + int link; + int speed; + int duplex; u32 caps; unsigned int dma_burst_length; @@ -843,9 +842,4 @@ static inline bool macb_is_gem(struct macb *bp) return !!(bp->caps & MACB_CAPS_MACB_IS_GEM); } -static inline bool macb_is_gem_hw(void __iomem *addr) -{ - return !!(MACB_BFEXT(IDNUM, readl_relaxed(addr + MACB_MID)) >= 0x2); -} - #endif /* _MACB_H */ diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index dda8a02b7322..8aee250904ec 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -125,6 +125,15 @@ */ #define NICPF_CLK_PER_INT_TICK 2 +/* Time to wait before we decide that a SQ is stuck. + * + * Since both pkt rx and tx notifications are done with same CQ, + * when packets are being received at very high rate (eg: L2 forwarding) + * then freeing transmitted skbs will be delayed and watchdog + * will kick in, resetting interface. Hence keeping this value high. + */ +#define NICVF_TX_TIMEOUT (50 * HZ) + struct nicvf_cq_poll { u8 cq_idx; /* Completion queue index */ struct napi_struct napi; @@ -216,8 +225,9 @@ struct nicvf_drv_stats { /* Tx */ u64 tx_frames_ok; u64 tx_drops; - u64 tx_busy; u64 tx_tso; + u64 txq_stop; + u64 txq_wake; }; struct nicvf { diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index 16bd2d772db9..a4228e664567 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -66,9 +66,10 @@ static const struct nicvf_stat nicvf_drv_stats[] = { NICVF_DRV_STAT(rx_frames_jumbo), NICVF_DRV_STAT(rx_drops), NICVF_DRV_STAT(tx_frames_ok), - NICVF_DRV_STAT(tx_busy), NICVF_DRV_STAT(tx_tso), NICVF_DRV_STAT(tx_drops), + NICVF_DRV_STAT(txq_stop), + NICVF_DRV_STAT(txq_wake), }; static const struct nicvf_stat nicvf_queue_stats[] = { @@ -126,6 +127,7 @@ static void nicvf_set_msglevel(struct net_device *netdev, u32 lvl) static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data) { + struct nicvf *nic = netdev_priv(netdev); int stats, qidx; if (sset != ETH_SS_STATS) @@ -141,7 +143,7 @@ static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data) data += ETH_GSTRING_LEN; } - for (qidx = 0; qidx < MAX_RCV_QUEUES_PER_QS; qidx++) { + for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) { for (stats = 0; stats < nicvf_n_queue_stats; stats++) { sprintf(data, "rxq%d: %s", qidx, nicvf_queue_stats[stats].name); @@ -149,7 +151,7 @@ static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data) } } - for (qidx = 0; qidx < MAX_SND_QUEUES_PER_QS; qidx++) { + for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) { for (stats = 0; stats < nicvf_n_queue_stats; stats++) { sprintf(data, "txq%d: %s", qidx, nicvf_queue_stats[stats].name); @@ -170,12 +172,14 @@ static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data) static int nicvf_get_sset_count(struct net_device *netdev, int sset) { + struct nicvf *nic = netdev_priv(netdev); + if (sset != ETH_SS_STATS) return -EINVAL; return nicvf_n_hw_stats + nicvf_n_drv_stats + (nicvf_n_queue_stats * - (MAX_RCV_QUEUES_PER_QS + MAX_SND_QUEUES_PER_QS)) + + (nic->qs->rq_cnt + nic->qs->sq_cnt)) + BGX_RX_STATS_COUNT + BGX_TX_STATS_COUNT; } @@ -197,13 +201,13 @@ static void nicvf_get_ethtool_stats(struct net_device *netdev, *(data++) = ((u64 *)&nic->drv_stats) [nicvf_drv_stats[stat].index]; - for (qidx = 0; qidx < MAX_RCV_QUEUES_PER_QS; qidx++) { + for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) { for (stat = 0; stat < nicvf_n_queue_stats; stat++) *(data++) = ((u64 *)&nic->qs->rq[qidx].stats) [nicvf_queue_stats[stat].index]; } - for (qidx = 0; qidx < MAX_SND_QUEUES_PER_QS; qidx++) { + for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) { for (stat = 0; stat < nicvf_n_queue_stats; stat++) *(data++) = ((u64 *)&nic->qs->sq[qidx].stats) [nicvf_queue_stats[stat].index]; @@ -543,6 +547,7 @@ static int nicvf_set_channels(struct net_device *dev, { struct nicvf *nic = netdev_priv(dev); int err = 0; + bool if_up = netif_running(dev); if (!channel->rx_count || !channel->tx_count) return -EINVAL; @@ -551,6 +556,9 @@ static int nicvf_set_channels(struct net_device *dev, if (channel->tx_count > MAX_SND_QUEUES_PER_QS) return -EINVAL; + if (if_up) + nicvf_stop(dev); + nic->qs->rq_cnt = channel->rx_count; nic->qs->sq_cnt = channel->tx_count; nic->qs->cq_cnt = max(nic->qs->rq_cnt, nic->qs->sq_cnt); @@ -559,11 +567,9 @@ static int nicvf_set_channels(struct net_device *dev, if (err) return err; - if (!netif_running(dev)) - return err; + if (if_up) + nicvf_open(dev); - nicvf_stop(dev); - nicvf_open(dev); netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n", nic->qs->sq_cnt, nic->qs->rq_cnt); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 8b119a035b7e..3b90afb8c293 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -234,7 +234,7 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic) nic->duplex == DUPLEX_FULL ? "Full duplex" : "Half duplex"); netif_carrier_on(nic->netdev); - netif_tx_wake_all_queues(nic->netdev); + netif_tx_start_all_queues(nic->netdev); } else { netdev_info(nic->netdev, "%s: Link is Down\n", nic->netdev->name); @@ -425,6 +425,7 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, if (skb) { prefetch(skb); dev_consume_skb_any(skb); + sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; } } @@ -476,12 +477,13 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, struct napi_struct *napi, int budget) { - int processed_cqe, work_done = 0; + int processed_cqe, work_done = 0, tx_done = 0; int cqe_count, cqe_head; struct nicvf *nic = netdev_priv(netdev); struct queue_set *qs = nic->qs; struct cmp_queue *cq = &qs->cq[cq_idx]; struct cqe_rx_t *cq_desc; + struct netdev_queue *txq; spin_lock_bh(&cq->lock); loop: @@ -496,8 +498,8 @@ loop: cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9; cqe_head &= 0xFFFF; - netdev_dbg(nic->netdev, "%s cqe_count %d cqe_head %d\n", - __func__, cqe_count, cqe_head); + netdev_dbg(nic->netdev, "%s CQ%d cqe_count %d cqe_head %d\n", + __func__, cq_idx, cqe_count, cqe_head); while (processed_cqe < cqe_count) { /* Get the CQ descriptor */ cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head); @@ -511,8 +513,8 @@ loop: break; } - netdev_dbg(nic->netdev, "cq_desc->cqe_type %d\n", - cq_desc->cqe_type); + netdev_dbg(nic->netdev, "CQ%d cq_desc->cqe_type %d\n", + cq_idx, cq_desc->cqe_type); switch (cq_desc->cqe_type) { case CQE_TYPE_RX: nicvf_rcv_pkt_handler(netdev, napi, cq, @@ -522,6 +524,7 @@ loop: case CQE_TYPE_SEND: nicvf_snd_pkt_handler(netdev, cq, (void *)cq_desc, CQE_TYPE_SEND); + tx_done++; break; case CQE_TYPE_INVALID: case CQE_TYPE_RX_SPLIT: @@ -532,8 +535,9 @@ loop: } processed_cqe++; } - netdev_dbg(nic->netdev, "%s processed_cqe %d work_done %d budget %d\n", - __func__, processed_cqe, work_done, budget); + netdev_dbg(nic->netdev, + "%s CQ%d processed_cqe %d work_done %d budget %d\n", + __func__, cq_idx, processed_cqe, work_done, budget); /* Ring doorbell to inform H/W to reuse processed CQEs */ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, @@ -543,6 +547,19 @@ loop: goto loop; done: + /* Wakeup TXQ if its stopped earlier due to SQ full */ + if (tx_done) { + txq = netdev_get_tx_queue(netdev, cq_idx); + if (netif_tx_queue_stopped(txq)) { + netif_tx_start_queue(txq); + nic->drv_stats.txq_wake++; + if (netif_msg_tx_err(nic)) + netdev_warn(netdev, + "%s: Transmit queue wakeup SQ%d\n", + netdev->name, cq_idx); + } + } + spin_unlock_bh(&cq->lock); return work_done; } @@ -554,15 +571,10 @@ static int nicvf_poll(struct napi_struct *napi, int budget) struct net_device *netdev = napi->dev; struct nicvf *nic = netdev_priv(netdev); struct nicvf_cq_poll *cq; - struct netdev_queue *txq; cq = container_of(napi, struct nicvf_cq_poll, napi); work_done = nicvf_cq_intr_handler(netdev, cq->cq_idx, napi, budget); - txq = netdev_get_tx_queue(netdev, cq->cq_idx); - if (netif_tx_queue_stopped(txq)) - netif_tx_wake_queue(txq); - if (work_done < budget) { /* Slow packet rate, exit polling */ napi_complete(napi); @@ -833,9 +845,9 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } - if (!nicvf_sq_append_skb(nic, skb) && !netif_tx_queue_stopped(txq)) { + if (!netif_tx_queue_stopped(txq) && !nicvf_sq_append_skb(nic, skb)) { netif_tx_stop_queue(txq); - nic->drv_stats.tx_busy++; + nic->drv_stats.txq_stop++; if (netif_msg_tx_err(nic)) netdev_warn(netdev, "%s: Transmit ring full, stopping SQ%d\n", @@ -859,7 +871,6 @@ int nicvf_stop(struct net_device *netdev) nicvf_send_msg_to_pf(nic, &mbx); netif_carrier_off(netdev); - netif_tx_disable(netdev); /* Disable RBDR & QS error interrupts */ for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) { @@ -894,6 +905,8 @@ int nicvf_stop(struct net_device *netdev) kfree(cq_poll); } + netif_tx_disable(netdev); + /* Free resources */ nicvf_config_data_transfer(nic, false); @@ -988,6 +1001,9 @@ int nicvf_open(struct net_device *netdev) for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); + nic->drv_stats.txq_stop = 0; + nic->drv_stats.txq_wake = 0; + netif_carrier_on(netdev); netif_tx_start_all_queues(netdev); @@ -1278,6 +1294,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features = netdev->features; netdev->netdev_ops = &nicvf_netdev_ops; + netdev->watchdog_timeo = NICVF_TX_TIMEOUT; INIT_WORK(&nic->reset_task, nicvf_reset_task); @@ -1318,11 +1335,17 @@ static void nicvf_remove(struct pci_dev *pdev) pci_disable_device(pdev); } +static void nicvf_shutdown(struct pci_dev *pdev) +{ + nicvf_remove(pdev); +} + static struct pci_driver nicvf_driver = { .name = DRV_NAME, .id_table = nicvf_id_table, .probe = nicvf_probe, .remove = nicvf_remove, + .shutdown = nicvf_shutdown, }; static int __init nicvf_init_module(void) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index d69d228d11a0..ca4240aa6d15 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -103,9 +103,11 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, /* Allocate a new page */ if (!nic->rb_page) { - nic->rb_page = alloc_pages(gfp | __GFP_COMP, order); + nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, + order); if (!nic->rb_page) { - netdev_err(nic->netdev, "Failed to allocate new rcv buffer\n"); + netdev_err(nic->netdev, + "Failed to allocate new rcv buffer\n"); return -ENOMEM; } nic->rb_page_offset = 0; @@ -382,7 +384,8 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) return; if (sq->tso_hdrs) - dma_free_coherent(&nic->pdev->dev, sq->dmem.q_len, + dma_free_coherent(&nic->pdev->dev, + sq->dmem.q_len * TSO_HEADER_SIZE, sq->tso_hdrs, sq->tso_hdrs_phys); kfree(sq->skbuff); @@ -863,10 +866,11 @@ void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq, continue; } skb = (struct sk_buff *)sq->skbuff[sq->head]; + if (skb) + dev_kfree_skb_any(skb); atomic64_add(1, (atomic64_t *)&netdev->stats.tx_packets); atomic64_add(hdr->tot_len, (atomic64_t *)&netdev->stats.tx_bytes); - dev_kfree_skb_any(skb); nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); } } @@ -992,7 +996,7 @@ static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry, memset(gather, 0, SND_QUEUE_DESC_SIZE); gather->subdesc_type = SQ_DESC_TYPE_GATHER; - gather->ld_type = NIC_SEND_LD_TYPE_E_LDWB; + gather->ld_type = NIC_SEND_LD_TYPE_E_LDD; gather->size = size; gather->addr = data; } @@ -1048,7 +1052,7 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, } nicvf_sq_add_hdr_subdesc(sq, hdr_qentry, seg_subdescs - 1, skb, seg_len); - sq->skbuff[hdr_qentry] = 0; + sq->skbuff[hdr_qentry] = (u64)NULL; qentry = nicvf_get_nxt_sqentry(sq, qentry); desc_cnt += seg_subdescs; @@ -1062,6 +1066,7 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, /* Inform HW to xmit all TSO segments */ nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, skb_get_queue_mapping(skb), desc_cnt); + nic->drv_stats.tx_tso++; return 1; } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 8341bdf755d1..f0937b7bfe9f 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -62,7 +62,7 @@ #define SND_QUEUE_CNT 8 #define CMP_QUEUE_CNT 8 /* Max of RCV and SND qcount */ -#define SND_QSIZE SND_QUEUE_SIZE4 +#define SND_QSIZE SND_QUEUE_SIZE2 #define SND_QUEUE_LEN (1ULL << (SND_QSIZE + 10)) #define MAX_SND_QUEUE_LEN (1ULL << (SND_QUEUE_SIZE6 + 10)) #define SND_QUEUE_THRESH 2ULL @@ -70,7 +70,10 @@ /* Since timestamp not enabled, otherwise 2 */ #define MAX_CQE_PER_PKT_XMIT 1 -#define CMP_QSIZE CMP_QUEUE_SIZE4 +/* Keep CQ and SQ sizes same, if timestamping + * is enabled this equation will change. + */ +#define CMP_QSIZE CMP_QUEUE_SIZE2 #define CMP_QUEUE_LEN (1ULL << (CMP_QSIZE + 10)) #define CMP_QUEUE_CQE_THRESH 0 #define CMP_QUEUE_TIMER_THRESH 220 /* 10usec */ @@ -87,7 +90,12 @@ #define MAX_CQES_FOR_TX ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \ MAX_CQE_PER_PKT_XMIT) -#define RQ_CQ_DROP ((CMP_QUEUE_LEN - MAX_CQES_FOR_TX) / 256) +/* Calculate number of CQEs to reserve for all SQEs. + * Its 1/256th level of CQ size. + * '+ 1' to account for pipelining + */ +#define RQ_CQ_DROP ((256 / (CMP_QUEUE_LEN / \ + (CMP_QUEUE_LEN - MAX_CQES_FOR_TX))) + 1) /* Descriptor size in bytes */ #define SND_QUEUE_DESC_SIZE 16 diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 633ec05dfe05..b961a89dc626 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -673,7 +673,10 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cmrx_cfg); bgx_flush_dmac_addrs(bgx, lmacid); - if (lmac->phydev) + if ((bgx->lmac_type != BGX_MODE_XFI) && + (bgx->lmac_type != BGX_MODE_XLAUI) && + (bgx->lmac_type != BGX_MODE_40G_KR) && + (bgx->lmac_type != BGX_MODE_10G_KR) && lmac->phydev) phy_disconnect(lmac->phydev); lmac->phydev = NULL; diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 1eee73cccdf5..99d33e2d35e6 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -562,6 +562,7 @@ struct fec_enet_private { }; void fec_ptp_init(struct platform_device *pdev); +void fec_ptp_stop(struct platform_device *pdev); void fec_ptp_start_cyclecounter(struct net_device *ndev); int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr); int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1f89c59b4353..32e3807c650e 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -24,6 +24,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> +#include <linux/pm_runtime.h> #include <linux/ptrace.h> #include <linux/errno.h> #include <linux/ioport.h> @@ -77,6 +78,7 @@ static void fec_enet_itr_coal_init(struct net_device *ndev); #define FEC_ENET_RAEM_V 0x8 #define FEC_ENET_RAFL_V 0x8 #define FEC_ENET_OPD_V 0xFFF0 +#define FEC_MDIO_PM_TIMEOUT 100 /* ms */ static struct platform_device_id fec_devtype[] = { { @@ -1767,7 +1769,13 @@ static void fec_enet_adjust_link(struct net_device *ndev) static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) { struct fec_enet_private *fep = bus->priv; + struct device *dev = &fep->pdev->dev; unsigned long time_left; + int ret = 0; + + ret = pm_runtime_get_sync(dev); + if (IS_ERR_VALUE(ret)) + return ret; fep->mii_timeout = 0; init_completion(&fep->mdio_done); @@ -1783,18 +1791,30 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) if (time_left == 0) { fep->mii_timeout = 1; netdev_err(fep->netdev, "MDIO read timeout\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto out; } - /* return value */ - return FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); + ret = FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); + +out: + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + return ret; } static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value) { struct fec_enet_private *fep = bus->priv; + struct device *dev = &fep->pdev->dev; unsigned long time_left; + int ret = 0; + + ret = pm_runtime_get_sync(dev); + if (IS_ERR_VALUE(ret)) + return ret; fep->mii_timeout = 0; init_completion(&fep->mdio_done); @@ -1811,10 +1831,13 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, if (time_left == 0) { fep->mii_timeout = 1; netdev_err(fep->netdev, "MDIO write timeout\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; } - return 0; + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + return ret; } static int fec_enet_clk_enable(struct net_device *ndev, bool enable) @@ -1826,9 +1849,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) ret = clk_prepare_enable(fep->clk_ahb); if (ret) return ret; - ret = clk_prepare_enable(fep->clk_ipg); - if (ret) - goto failed_clk_ipg; if (fep->clk_enet_out) { ret = clk_prepare_enable(fep->clk_enet_out); if (ret) @@ -1852,7 +1872,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) } } else { clk_disable_unprepare(fep->clk_ahb); - clk_disable_unprepare(fep->clk_ipg); if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); if (fep->clk_ptp) { @@ -1874,8 +1893,6 @@ failed_clk_ptp: if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); failed_clk_enet_out: - clk_disable_unprepare(fep->clk_ipg); -failed_clk_ipg: clk_disable_unprepare(fep->clk_ahb); return ret; @@ -2847,10 +2864,14 @@ fec_enet_open(struct net_device *ndev) struct fec_enet_private *fep = netdev_priv(ndev); int ret; + ret = pm_runtime_get_sync(&fep->pdev->dev); + if (IS_ERR_VALUE(ret)) + return ret; + pinctrl_pm_select_default_state(&fep->pdev->dev); ret = fec_enet_clk_enable(ndev, true); if (ret) - return ret; + goto clk_enable; /* I should reset the ring buffers here, but I don't yet know * a simple way to do that. @@ -2881,6 +2902,9 @@ err_enet_mii_probe: fec_enet_free_buffers(ndev); err_enet_alloc: fec_enet_clk_enable(ndev, false); +clk_enable: + pm_runtime_mark_last_busy(&fep->pdev->dev); + pm_runtime_put_autosuspend(&fep->pdev->dev); pinctrl_pm_select_sleep_state(&fep->pdev->dev); return ret; } @@ -2903,6 +2927,9 @@ fec_enet_close(struct net_device *ndev) fec_enet_clk_enable(ndev, false); pinctrl_pm_select_sleep_state(&fep->pdev->dev); + pm_runtime_mark_last_busy(&fep->pdev->dev); + pm_runtime_put_autosuspend(&fep->pdev->dev); + fec_enet_free_buffers(ndev); return 0; @@ -3115,8 +3142,8 @@ static int fec_enet_init(struct net_device *ndev) fep->bufdesc_size; /* Allocate memory for buffer descriptors. */ - cbd_base = dma_alloc_coherent(NULL, bd_size, &bd_dma, - GFP_KERNEL); + cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma, + GFP_KERNEL); if (!cbd_base) { return -ENOMEM; } @@ -3388,6 +3415,10 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_clk; + ret = clk_prepare_enable(fep->clk_ipg); + if (ret) + goto failed_clk_ipg; + fep->reg_phy = devm_regulator_get(&pdev->dev, "phy"); if (!IS_ERR(fep->reg_phy)) { ret = regulator_enable(fep->reg_phy); @@ -3400,6 +3431,11 @@ fec_probe(struct platform_device *pdev) fep->reg_phy = NULL; } + pm_runtime_set_autosuspend_delay(&pdev->dev, FEC_MDIO_PM_TIMEOUT); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + fec_reset_phy(pdev); if (fep->bufdesc_ex) @@ -3447,6 +3483,10 @@ fec_probe(struct platform_device *pdev) fep->rx_copybreak = COPYBREAK_DEFAULT; INIT_WORK(&fep->tx_timeout_work, fec_enet_timeout_work); + + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); + return 0; failed_register: @@ -3454,9 +3494,12 @@ failed_register: failed_mii_init: failed_irq: failed_init: + fec_ptp_stop(pdev); if (fep->reg_phy) regulator_disable(fep->reg_phy); failed_regulator: + clk_disable_unprepare(fep->clk_ipg); +failed_clk_ipg: fec_enet_clk_enable(ndev, false); failed_clk: failed_phy: @@ -3473,14 +3516,12 @@ fec_drv_remove(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); - cancel_delayed_work_sync(&fep->time_keep); cancel_work_sync(&fep->tx_timeout_work); + fec_ptp_stop(pdev); unregister_netdev(ndev); fec_enet_mii_remove(fep); if (fep->reg_phy) regulator_disable(fep->reg_phy); - if (fep->ptp_clock) - ptp_clock_unregister(fep->ptp_clock); of_node_put(fep->phy_node); free_netdev(ndev); @@ -3568,7 +3609,28 @@ failed_clk: return ret; } -static SIMPLE_DEV_PM_OPS(fec_pm_ops, fec_suspend, fec_resume); +static int __maybe_unused fec_runtime_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct fec_enet_private *fep = netdev_priv(ndev); + + clk_disable_unprepare(fep->clk_ipg); + + return 0; +} + +static int __maybe_unused fec_runtime_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct fec_enet_private *fep = netdev_priv(ndev); + + return clk_prepare_enable(fep->clk_ipg); +} + +static const struct dev_pm_ops fec_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(fec_suspend, fec_resume) + SET_RUNTIME_PM_OPS(fec_runtime_suspend, fec_runtime_resume, NULL) +}; static struct platform_driver fec_driver = { .driver = { diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index a15663ad7f5e..f457a23d0bfb 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -604,6 +604,16 @@ void fec_ptp_init(struct platform_device *pdev) schedule_delayed_work(&fep->time_keep, HZ); } +void fec_ptp_stop(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct fec_enet_private *fep = netdev_priv(ndev); + + cancel_delayed_work_sync(&fep->time_keep); + if (fep->ptp_clock) + ptp_clock_unregister(fep->ptp_clock); +} + /** * fec_ptp_check_pps_event * @fep: the fec_enet_private structure handle diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index ff875028fdff..2b7610f341b0 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -565,22 +565,6 @@ static void gfar_ints_enable(struct gfar_private *priv) } } -static void lock_tx_qs(struct gfar_private *priv) -{ - int i; - - for (i = 0; i < priv->num_tx_queues; i++) - spin_lock(&priv->tx_queue[i]->txlock); -} - -static void unlock_tx_qs(struct gfar_private *priv) -{ - int i; - - for (i = 0; i < priv->num_tx_queues; i++) - spin_unlock(&priv->tx_queue[i]->txlock); -} - static int gfar_alloc_tx_queues(struct gfar_private *priv) { int i; @@ -1376,7 +1360,6 @@ static int gfar_probe(struct platform_device *ofdev) priv->dev = &ofdev->dev; SET_NETDEV_DEV(dev, &ofdev->dev); - spin_lock_init(&priv->bflock); INIT_WORK(&priv->reset_task, gfar_reset_task); platform_set_drvdata(ofdev, priv); @@ -1470,9 +1453,8 @@ static int gfar_probe(struct platform_device *ofdev) goto register_fail; } - device_init_wakeup(&dev->dev, - priv->device_flags & - FSL_GIANFAR_DEV_HAS_MAGIC_PACKET); + device_set_wakeup_capable(&dev->dev, priv->device_flags & + FSL_GIANFAR_DEV_HAS_MAGIC_PACKET); /* fill out IRQ number and name fields */ for (i = 0; i < priv->num_grps; i++) { @@ -1540,48 +1522,37 @@ static int gfar_suspend(struct device *dev) struct gfar_private *priv = dev_get_drvdata(dev); struct net_device *ndev = priv->ndev; struct gfar __iomem *regs = priv->gfargrp[0].regs; - unsigned long flags; u32 tempval; - int magic_packet = priv->wol_en && (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET); + if (!netif_running(ndev)) + return 0; + + disable_napi(priv); + netif_tx_lock(ndev); netif_device_detach(ndev); + netif_tx_unlock(ndev); - if (netif_running(ndev)) { + gfar_halt(priv); - local_irq_save(flags); - lock_tx_qs(priv); + if (magic_packet) { + /* Enable interrupt on Magic Packet */ + gfar_write(®s->imask, IMASK_MAG); - gfar_halt_nodisable(priv); + /* Enable Magic Packet mode */ + tempval = gfar_read(®s->maccfg2); + tempval |= MACCFG2_MPEN; + gfar_write(®s->maccfg2, tempval); - /* Disable Tx, and Rx if wake-on-LAN is disabled. */ + /* re-enable the Rx block */ tempval = gfar_read(®s->maccfg1); - - tempval &= ~MACCFG1_TX_EN; - - if (!magic_packet) - tempval &= ~MACCFG1_RX_EN; - + tempval |= MACCFG1_RX_EN; gfar_write(®s->maccfg1, tempval); - unlock_tx_qs(priv); - local_irq_restore(flags); - - disable_napi(priv); - - if (magic_packet) { - /* Enable interrupt on Magic Packet */ - gfar_write(®s->imask, IMASK_MAG); - - /* Enable Magic Packet mode */ - tempval = gfar_read(®s->maccfg2); - tempval |= MACCFG2_MPEN; - gfar_write(®s->maccfg2, tempval); - } else { - phy_stop(priv->phydev); - } + } else { + phy_stop(priv->phydev); } return 0; @@ -1592,37 +1563,26 @@ static int gfar_resume(struct device *dev) struct gfar_private *priv = dev_get_drvdata(dev); struct net_device *ndev = priv->ndev; struct gfar __iomem *regs = priv->gfargrp[0].regs; - unsigned long flags; u32 tempval; int magic_packet = priv->wol_en && (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET); - if (!netif_running(ndev)) { - netif_device_attach(ndev); + if (!netif_running(ndev)) return 0; - } - if (!magic_packet && priv->phydev) + if (magic_packet) { + /* Disable Magic Packet mode */ + tempval = gfar_read(®s->maccfg2); + tempval &= ~MACCFG2_MPEN; + gfar_write(®s->maccfg2, tempval); + } else { phy_start(priv->phydev); - - /* Disable Magic Packet mode, in case something - * else woke us up. - */ - local_irq_save(flags); - lock_tx_qs(priv); - - tempval = gfar_read(®s->maccfg2); - tempval &= ~MACCFG2_MPEN; - gfar_write(®s->maccfg2, tempval); + } gfar_start(priv); - unlock_tx_qs(priv); - local_irq_restore(flags); - netif_device_attach(ndev); - enable_napi(priv); return 0; @@ -2045,7 +2005,8 @@ static int register_grp_irqs(struct gfar_priv_grp *grp) /* Install our interrupt handlers for Error, * Transmit, and Receive */ - err = request_irq(gfar_irq(grp, ER)->irq, gfar_error, 0, + err = request_irq(gfar_irq(grp, ER)->irq, gfar_error, + IRQF_NO_SUSPEND, gfar_irq(grp, ER)->name, grp); if (err < 0) { netif_err(priv, intr, dev, "Can't get IRQ %d\n", @@ -2068,7 +2029,8 @@ static int register_grp_irqs(struct gfar_priv_grp *grp) goto rx_irq_fail; } } else { - err = request_irq(gfar_irq(grp, TX)->irq, gfar_interrupt, 0, + err = request_irq(gfar_irq(grp, TX)->irq, gfar_interrupt, + IRQF_NO_SUSPEND, gfar_irq(grp, TX)->name, grp); if (err < 0) { netif_err(priv, intr, dev, "Can't get IRQ %d\n", @@ -2169,8 +2131,6 @@ static int gfar_enet_open(struct net_device *dev) if (err) return err; - device_set_wakeup_enable(&dev->dev, priv->wol_en); - return err; } diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h index daa1d37de642..5545e4103368 100644 --- a/drivers/net/ethernet/freescale/gianfar.h +++ b/drivers/net/ethernet/freescale/gianfar.h @@ -1145,9 +1145,6 @@ struct gfar_private { int oldduplex; int oldlink; - /* Bitfield update lock */ - spinlock_t bflock; - uint32_t msg_enable; struct work_struct reset_task; diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index fda12fb32ec7..3c0a8f825b63 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -653,7 +653,6 @@ static void gfar_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) static int gfar_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct gfar_private *priv = netdev_priv(dev); - unsigned long flags; if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) && wol->wolopts != 0) @@ -664,9 +663,7 @@ static int gfar_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) device_set_wakeup_enable(&dev->dev, wol->wolopts & WAKE_MAGIC); - spin_lock_irqsave(&priv->bflock, flags); - priv->wol_en = !!device_may_wakeup(&dev->dev); - spin_unlock_irqrestore(&priv->bflock, flags); + priv->wol_en = !!device_may_wakeup(&dev->dev); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 82040137d7d9..0a3202047569 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -686,6 +686,7 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; struct mlx4_cmd_context *context; + long ret_wait; int err = 0; down(&cmd->event_sem); @@ -711,8 +712,20 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, if (err) goto out_reset; - if (!wait_for_completion_timeout(&context->done, - msecs_to_jiffies(timeout))) { + if (op == MLX4_CMD_SENSE_PORT) { + ret_wait = + wait_for_completion_interruptible_timeout(&context->done, + msecs_to_jiffies(timeout)); + if (ret_wait < 0) { + context->fw_status = 0; + context->out_param = 0; + context->result = 0; + } + } else { + ret_wait = (long)wait_for_completion_timeout(&context->done, + msecs_to_jiffies(timeout)); + } + if (!ret_wait) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); if (op == MLX4_CMD_NOP) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 7a4f20bb7fcb..9c145dddd717 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -246,7 +246,6 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, static inline bool mlx4_en_is_ring_empty(struct mlx4_en_rx_ring *ring) { - BUG_ON((u32)(ring->prod - ring->cons) > ring->actual_size); return ring->prod == ring->cons; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index aae13adfb492..8e81e53c370e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -601,7 +601,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) continue; mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN to slave: %d, port:%d\n", __func__, i, port); - s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) { eqe->event.port_change.port = cpu_to_be32( @@ -640,7 +640,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) continue; if (i == mlx4_master_func_num(dev)) continue; - s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) { eqe->event.port_change.port = cpu_to_be32( diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 12fbfcb44d8a..29c2a017a450 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2273,6 +2273,11 @@ static int mlx4_allocate_default_counters(struct mlx4_dev *dev) } else if (err == -ENOENT) { err = 0; continue; + } else if (mlx4_is_slave(dev) && err == -EINVAL) { + priv->def_counter[port] = MLX4_SINK_COUNTER_INDEX(dev); + mlx4_warn(dev, "can't allocate counter from old PF driver, using index %d\n", + MLX4_SINK_COUNTER_INDEX(dev)); + err = 0; } else { mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n", __func__, port + 1, err); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index 33669c29b341..753ea8bad953 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -1415,7 +1415,7 @@ static int qlcnic_83xx_copy_fw_file(struct qlcnic_adapter *adapter) if (fw->size & 0xF) { addr = dest + size; for (i = 0; i < (fw->size & 0xF); i++) - data[i] = temp[size + i]; + data[i] = ((u8 *)temp)[size + i]; for (; i < 16; i++) data[i] = 0; ret = qlcnic_ms_mem_write128(adapter, addr, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index f3918c7e7eeb..bcdc8955c719 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -413,3 +413,7 @@ static int stmmac_pltfr_resume(struct device *dev) SIMPLE_DEV_PM_OPS(stmmac_pltfr_pm_ops, stmmac_pltfr_suspend, stmmac_pltfr_resume); EXPORT_SYMBOL_GPL(stmmac_pltfr_pm_ops); + +MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet platform support"); +MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 0c5842aeb807..ab6051a43134 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6658,10 +6658,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, struct sk_buff *skb_new; skb_new = skb_realloc_headroom(skb, len); - if (!skb_new) { - rp->tx_errors++; + if (!skb_new) goto out_drop; - } kfree_skb(skb); skb = skb_new; } else diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h index bbacf5cccec2..a8a730641bbb 100644 --- a/drivers/net/ethernet/ti/netcp.h +++ b/drivers/net/ethernet/ti/netcp.h @@ -223,6 +223,7 @@ void *netcp_device_find_module(struct netcp_device *netcp_device, /* SGMII functions */ int netcp_sgmii_reset(void __iomem *sgmii_ofs, int port); +bool netcp_sgmii_rtreset(void __iomem *sgmii_ofs, int port, bool set); int netcp_sgmii_get_port_link(void __iomem *sgmii_ofs, int port); int netcp_sgmii_config(void __iomem *sgmii_ofs, int port, u32 interface); diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index ec8ed30196f3..9749dfd78c43 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -2112,6 +2112,7 @@ probe_quit: static int netcp_remove(struct platform_device *pdev) { struct netcp_device *netcp_device = platform_get_drvdata(pdev); + struct netcp_intf *netcp_intf, *netcp_tmp; struct netcp_inst_modpriv *inst_modpriv, *tmp; struct netcp_module *module; @@ -2123,10 +2124,17 @@ static int netcp_remove(struct platform_device *pdev) list_del(&inst_modpriv->inst_list); kfree(inst_modpriv); } - WARN(!list_empty(&netcp_device->interface_head), "%s interface list not empty!\n", - pdev->name); - devm_kfree(&pdev->dev, netcp_device); + /* now that all modules are removed, clean up the interfaces */ + list_for_each_entry_safe(netcp_intf, netcp_tmp, + &netcp_device->interface_head, + interface_list) { + netcp_delete_interface(netcp_device, netcp_intf->ndev); + } + + WARN(!list_empty(&netcp_device->interface_head), + "%s interface list not empty!\n", pdev->name); + pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); platform_set_drvdata(pdev, NULL); diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 9b7e0a34c98b..1974a8ae764a 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -1901,11 +1901,28 @@ static void gbe_port_config(struct gbe_priv *gbe_dev, struct gbe_slave *slave, writel(slave->mac_control, GBE_REG_ADDR(slave, emac_regs, mac_control)); } +static void gbe_sgmii_rtreset(struct gbe_priv *priv, + struct gbe_slave *slave, bool set) +{ + void __iomem *sgmii_port_regs; + + if (SLAVE_LINK_IS_XGMII(slave)) + return; + + if ((priv->ss_version == GBE_SS_VERSION_14) && (slave->slave_num >= 2)) + sgmii_port_regs = priv->sgmii_port34_regs; + else + sgmii_port_regs = priv->sgmii_port_regs; + + netcp_sgmii_rtreset(sgmii_port_regs, slave->slave_num, set); +} + static void gbe_slave_stop(struct gbe_intf *intf) { struct gbe_priv *gbe_dev = intf->gbe_dev; struct gbe_slave *slave = intf->slave; + gbe_sgmii_rtreset(gbe_dev, slave, true); gbe_port_reset(slave); /* Disable forwarding */ cpsw_ale_control_set(gbe_dev->ale, slave->port_num, @@ -1947,6 +1964,7 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf) gbe_sgmii_config(priv, slave); gbe_port_reset(slave); + gbe_sgmii_rtreset(priv, slave, false); gbe_port_config(priv, slave, priv->rx_packet_max); gbe_set_slave_mac(slave, gbe_intf); /* enable forwarding */ @@ -2490,10 +2508,9 @@ static void free_secondary_ports(struct gbe_priv *gbe_dev) { struct gbe_slave *slave; - for (;;) { + while (!list_empty(&gbe_dev->secondary_slaves)) { slave = first_sec_slave(gbe_dev); - if (!slave) - break; + if (slave->phy) phy_disconnect(slave->phy); list_del(&slave->slave_list); @@ -2839,14 +2856,13 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, &gbe_dev->dma_chan_name); if (ret < 0) { dev_err(dev, "missing \"tx-channel\" parameter\n"); - ret = -ENODEV; - goto quit; + return -EINVAL; } if (!strcmp(node->name, "gbe")) { ret = get_gbe_resource_version(gbe_dev, node); if (ret) - goto quit; + return ret; dev_dbg(dev, "ss_version: 0x%08x\n", gbe_dev->ss_version); @@ -2857,22 +2873,20 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, else ret = -ENODEV; - if (ret) - goto quit; } else if (!strcmp(node->name, "xgbe")) { ret = set_xgbe_ethss10_priv(gbe_dev, node); if (ret) - goto quit; + return ret; ret = netcp_xgbe_serdes_init(gbe_dev->xgbe_serdes_regs, gbe_dev->ss_regs); - if (ret) - goto quit; } else { dev_err(dev, "unknown GBE node(%s)\n", node->name); ret = -ENODEV; - goto quit; } + if (ret) + return ret; + interfaces = of_get_child_by_name(node, "interfaces"); if (!interfaces) dev_err(dev, "could not find interfaces\n"); @@ -2880,11 +2894,11 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, ret = netcp_txpipe_init(&gbe_dev->tx_pipe, netcp_device, gbe_dev->dma_chan_name, gbe_dev->tx_queue_id); if (ret) - goto quit; + return ret; ret = netcp_txpipe_open(&gbe_dev->tx_pipe); if (ret) - goto quit; + return ret; /* Create network interfaces */ INIT_LIST_HEAD(&gbe_dev->gbe_intf_head); @@ -2899,6 +2913,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, if (gbe_dev->num_slaves >= gbe_dev->max_num_slaves) break; } + of_node_put(interfaces); if (!gbe_dev->num_slaves) dev_warn(dev, "No network interface configured\n"); @@ -2911,9 +2926,10 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, of_node_put(secondary_ports); if (!gbe_dev->num_slaves) { - dev_err(dev, "No network interface or secondary ports configured\n"); + dev_err(dev, + "No network interface or secondary ports configured\n"); ret = -ENODEV; - goto quit; + goto free_sec_ports; } memset(&ale_params, 0, sizeof(ale_params)); @@ -2927,7 +2943,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, if (!gbe_dev->ale) { dev_err(gbe_dev->dev, "error initializing ale engine\n"); ret = -ENODEV; - goto quit; + goto free_sec_ports; } else { dev_dbg(gbe_dev->dev, "Created a gbe ale engine\n"); } @@ -2943,14 +2959,8 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, *inst_priv = gbe_dev; return 0; -quit: - if (gbe_dev->hw_stats) - devm_kfree(dev, gbe_dev->hw_stats); - cpsw_ale_destroy(gbe_dev->ale); - if (gbe_dev->ss_regs) - devm_iounmap(dev, gbe_dev->ss_regs); - of_node_put(interfaces); - devm_kfree(dev, gbe_dev); +free_sec_ports: + free_secondary_ports(gbe_dev); return ret; } @@ -3023,12 +3033,9 @@ static int gbe_remove(struct netcp_device *netcp_device, void *inst_priv) free_secondary_ports(gbe_dev); if (!list_empty(&gbe_dev->gbe_intf_head)) - dev_alert(gbe_dev->dev, "unreleased ethss interfaces present\n"); + dev_alert(gbe_dev->dev, + "unreleased ethss interfaces present\n"); - devm_kfree(gbe_dev->dev, gbe_dev->hw_stats); - devm_iounmap(gbe_dev->dev, gbe_dev->ss_regs); - memset(gbe_dev, 0x00, sizeof(*gbe_dev)); - devm_kfree(gbe_dev->dev, gbe_dev); return 0; } diff --git a/drivers/net/ethernet/ti/netcp_sgmii.c b/drivers/net/ethernet/ti/netcp_sgmii.c index dbeb14266e2f..5d8419f658d0 100644 --- a/drivers/net/ethernet/ti/netcp_sgmii.c +++ b/drivers/net/ethernet/ti/netcp_sgmii.c @@ -18,6 +18,9 @@ #include "netcp.h" +#define SGMII_SRESET_RESET BIT(0) +#define SGMII_SRESET_RTRESET BIT(1) + #define SGMII_REG_STATUS_LOCK BIT(4) #define SGMII_REG_STATUS_LINK BIT(0) #define SGMII_REG_STATUS_AUTONEG BIT(2) @@ -51,12 +54,35 @@ static void sgmii_write_reg_bit(void __iomem *base, int reg, u32 val) int netcp_sgmii_reset(void __iomem *sgmii_ofs, int port) { /* Soft reset */ - sgmii_write_reg_bit(sgmii_ofs, SGMII_SRESET_REG(port), 0x1); - while (sgmii_read_reg(sgmii_ofs, SGMII_SRESET_REG(port)) != 0x0) + sgmii_write_reg_bit(sgmii_ofs, SGMII_SRESET_REG(port), + SGMII_SRESET_RESET); + + while ((sgmii_read_reg(sgmii_ofs, SGMII_SRESET_REG(port)) & + SGMII_SRESET_RESET) != 0x0) ; + return 0; } +/* port is 0 based */ +bool netcp_sgmii_rtreset(void __iomem *sgmii_ofs, int port, bool set) +{ + u32 reg; + bool oldval; + + /* Initiate a soft reset */ + reg = sgmii_read_reg(sgmii_ofs, SGMII_SRESET_REG(port)); + oldval = (reg & SGMII_SRESET_RTRESET) != 0x0; + if (set) + reg |= SGMII_SRESET_RTRESET; + else + reg &= ~SGMII_SRESET_RTRESET; + sgmii_write_reg(sgmii_ofs, SGMII_SRESET_REG(port), reg); + wmb(); + + return oldval; +} + int netcp_sgmii_get_port_link(void __iomem *sgmii_ofs, int port) { u32 status = 0, link = 0; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 3b933bb5a8d5..edd77342773a 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -719,6 +719,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0; int copylen = 0; + int depth; bool zerocopy = false; size_t linear; ssize_t n; @@ -804,6 +805,12 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, skb_probe_transport_header(skb, ETH_HLEN); + /* Move network header to the right position for VLAN tagged packets */ + if ((skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)) && + __vlan_get_protocol(skb, skb->protocol, &depth) != 0) + skb_set_network_header(skb, depth); + rcu_read_lock(); vlan = rcu_dereference(q->vlan); /* copy skb_ubuf_info for callback when skb has no error */ diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7f6419ebb5e1..ad8cbc6c9ee7 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -27,7 +27,7 @@ #include <linux/usb/cdc.h> /* Version Information */ -#define DRIVER_VERSION "v1.08.0 (2015/01/13)" +#define DRIVER_VERSION "v1.08.1 (2015/07/28)" #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>" #define DRIVER_DESC "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters" #define MODULENAME "r8152" @@ -1902,11 +1902,10 @@ static void rtl_drop_queued_tx(struct r8152 *tp) static void rtl8152_tx_timeout(struct net_device *netdev) { struct r8152 *tp = netdev_priv(netdev); - int i; netif_warn(tp, tx_err, netdev, "Tx timeout\n"); - for (i = 0; i < RTL8152_MAX_TX; i++) - usb_unlink_urb(tp->tx_info[i].urb); + + usb_queue_reset_device(tp->intf); } static void rtl8152_set_rx_mode(struct net_device *netdev) @@ -2075,7 +2074,6 @@ static int rtl_start_rx(struct r8152 *tp) { int i, ret = 0; - napi_disable(&tp->napi); INIT_LIST_HEAD(&tp->rx_done); for (i = 0; i < RTL8152_MAX_RX; i++) { INIT_LIST_HEAD(&tp->rx_info[i].list); @@ -2083,7 +2081,6 @@ static int rtl_start_rx(struct r8152 *tp) if (ret) break; } - napi_enable(&tp->napi); if (ret && ++i < RTL8152_MAX_RX) { struct list_head rx_queue; @@ -2166,6 +2163,7 @@ static int rtl8153_enable(struct r8152 *tp) if (test_bit(RTL8152_UNPLUG, &tp->flags)) return -ENODEV; + usb_disable_lpm(tp->udev); set_tx_qlen(tp); rtl_set_eee_plus(tp); r8153_set_rx_early_timeout(tp); @@ -2337,11 +2335,61 @@ static void __rtl_set_wol(struct r8152 *tp, u32 wolopts) device_set_wakeup_enable(&tp->udev->dev, false); } +static void r8153_u1u2en(struct r8152 *tp, bool enable) +{ + u8 u1u2[8]; + + if (enable) + memset(u1u2, 0xff, sizeof(u1u2)); + else + memset(u1u2, 0x00, sizeof(u1u2)); + + usb_ocp_write(tp, USB_TOLERANCE, BYTE_EN_SIX_BYTES, sizeof(u1u2), u1u2); +} + +static void r8153_u2p3en(struct r8152 *tp, bool enable) +{ + u32 ocp_data; + + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL); + if (enable && tp->version != RTL_VER_03 && tp->version != RTL_VER_04) + ocp_data |= U2P3_ENABLE; + else + ocp_data &= ~U2P3_ENABLE; + ocp_write_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL, ocp_data); +} + +static void r8153_power_cut_en(struct r8152 *tp, bool enable) +{ + u32 ocp_data; + + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_POWER_CUT); + if (enable) + ocp_data |= PWR_EN | PHASE2_EN; + else + ocp_data &= ~(PWR_EN | PHASE2_EN); + ocp_write_word(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data); + + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0); + ocp_data &= ~PCUT_STATUS; + ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data); +} + +static bool rtl_can_wakeup(struct r8152 *tp) +{ + struct usb_device *udev = tp->udev; + + return (udev->actconfig->desc.bmAttributes & USB_CONFIG_ATT_WAKEUP); +} + static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable) { if (enable) { u32 ocp_data; + r8153_u1u2en(tp, false); + r8153_u2p3en(tp, false); + __rtl_set_wol(tp, WAKE_ANY); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG); @@ -2353,6 +2401,8 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable) ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML); } else { __rtl_set_wol(tp, tp->saved_wolopts); + r8153_u2p3en(tp, true); + r8153_u1u2en(tp, true); } } @@ -2599,46 +2649,6 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) set_bit(PHY_RESET, &tp->flags); } -static void r8153_u1u2en(struct r8152 *tp, bool enable) -{ - u8 u1u2[8]; - - if (enable) - memset(u1u2, 0xff, sizeof(u1u2)); - else - memset(u1u2, 0x00, sizeof(u1u2)); - - usb_ocp_write(tp, USB_TOLERANCE, BYTE_EN_SIX_BYTES, sizeof(u1u2), u1u2); -} - -static void r8153_u2p3en(struct r8152 *tp, bool enable) -{ - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL); - if (enable) - ocp_data |= U2P3_ENABLE; - else - ocp_data &= ~U2P3_ENABLE; - ocp_write_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL, ocp_data); -} - -static void r8153_power_cut_en(struct r8152 *tp, bool enable) -{ - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_POWER_CUT); - if (enable) - ocp_data |= PWR_EN | PHASE2_EN; - else - ocp_data &= ~(PWR_EN | PHASE2_EN); - ocp_write_word(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data); - - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0); - ocp_data &= ~PCUT_STATUS; - ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data); -} - static void r8153_first_init(struct r8152 *tp) { u32 ocp_data; @@ -2781,6 +2791,7 @@ static void rtl8153_disable(struct r8152 *tp) r8153_disable_aldps(tp); rtl_disable(tp); r8153_enable_aldps(tp); + usb_enable_lpm(tp->udev); } static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u16 speed, u8 duplex) @@ -2901,9 +2912,13 @@ static void rtl8153_up(struct r8152 *tp) if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; + r8153_u1u2en(tp, false); r8153_disable_aldps(tp); r8153_first_init(tp); r8153_enable_aldps(tp); + r8153_u2p3en(tp, true); + r8153_u1u2en(tp, true); + usb_enable_lpm(tp->udev); } static void rtl8153_down(struct r8152 *tp) @@ -2914,6 +2929,7 @@ static void rtl8153_down(struct r8152 *tp) } r8153_u1u2en(tp, false); + r8153_u2p3en(tp, false); r8153_power_cut_en(tp, false); r8153_disable_aldps(tp); r8153_enter_oob(tp); @@ -2932,8 +2948,10 @@ static void set_carrier(struct r8152 *tp) if (!netif_carrier_ok(netdev)) { tp->rtl_ops.enable(tp); set_bit(RTL8152_SET_RX_MODE, &tp->flags); + napi_disable(&tp->napi); netif_carrier_on(netdev); rtl_start_rx(tp); + napi_enable(&tp->napi); } } else { if (netif_carrier_ok(netdev)) { @@ -3252,6 +3270,7 @@ static void r8153_init(struct r8152 *tp) msleep(20); } + usb_disable_lpm(tp->udev); r8153_u2p3en(tp, false); if (tp->version == RTL_VER_04) { @@ -3319,6 +3338,59 @@ static void r8153_init(struct r8152 *tp) r8153_enable_aldps(tp); r8152b_enable_fc(tp); rtl_tally_reset(tp); + r8153_u2p3en(tp, true); +} + +static int rtl8152_pre_reset(struct usb_interface *intf) +{ + struct r8152 *tp = usb_get_intfdata(intf); + struct net_device *netdev; + + if (!tp) + return 0; + + netdev = tp->netdev; + if (!netif_running(netdev)) + return 0; + + napi_disable(&tp->napi); + clear_bit(WORK_ENABLE, &tp->flags); + usb_kill_urb(tp->intr_urb); + cancel_delayed_work_sync(&tp->schedule); + if (netif_carrier_ok(netdev)) { + netif_stop_queue(netdev); + mutex_lock(&tp->control); + tp->rtl_ops.disable(tp); + mutex_unlock(&tp->control); + } + + return 0; +} + +static int rtl8152_post_reset(struct usb_interface *intf) +{ + struct r8152 *tp = usb_get_intfdata(intf); + struct net_device *netdev; + + if (!tp) + return 0; + + netdev = tp->netdev; + if (!netif_running(netdev)) + return 0; + + set_bit(WORK_ENABLE, &tp->flags); + if (netif_carrier_ok(netdev)) { + mutex_lock(&tp->control); + tp->rtl_ops.enable(tp); + rtl8152_set_rx_mode(netdev); + mutex_unlock(&tp->control); + netif_wake_queue(netdev); + } + + napi_enable(&tp->napi); + + return 0; } static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) @@ -3374,9 +3446,11 @@ static int rtl8152_resume(struct usb_interface *intf) if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { rtl_runtime_suspend_enable(tp, false); clear_bit(SELECTIVE_SUSPEND, &tp->flags); + napi_disable(&tp->napi); set_bit(WORK_ENABLE, &tp->flags); if (netif_carrier_ok(tp->netdev)) rtl_start_rx(tp); + napi_enable(&tp->napi); } else { tp->rtl_ops.up(tp); rtl8152_set_speed(tp, AUTONEG_ENABLE, @@ -3403,12 +3477,15 @@ static void rtl8152_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) if (usb_autopm_get_interface(tp->intf) < 0) return; - mutex_lock(&tp->control); - - wol->supported = WAKE_ANY; - wol->wolopts = __rtl_get_wol(tp); - - mutex_unlock(&tp->control); + if (!rtl_can_wakeup(tp)) { + wol->supported = 0; + wol->wolopts = 0; + } else { + mutex_lock(&tp->control); + wol->supported = WAKE_ANY; + wol->wolopts = __rtl_get_wol(tp); + mutex_unlock(&tp->control); + } usb_autopm_put_interface(tp->intf); } @@ -3418,6 +3495,9 @@ static int rtl8152_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) struct r8152 *tp = netdev_priv(dev); int ret; + if (!rtl_can_wakeup(tp)) + return -EOPNOTSUPP; + ret = usb_autopm_get_interface(tp->intf); if (ret < 0) goto out_set_wol; @@ -4059,6 +4139,9 @@ static int rtl8152_probe(struct usb_interface *intf, goto out1; } + if (!rtl_can_wakeup(tp)) + __rtl_set_wol(tp, 0); + tp->saved_wolopts = __rtl_get_wol(tp); if (tp->saved_wolopts) device_set_wakeup_enable(&udev->dev, true); @@ -4132,6 +4215,8 @@ static struct usb_driver rtl8152_driver = { .suspend = rtl8152_suspend, .resume = rtl8152_resume, .reset_resume = rtl8152_resume, + .pre_reset = rtl8152_pre_reset, + .post_reset = rtl8152_post_reset, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, }; diff --git a/include/linux/platform_data/macb.h b/include/linux/platform_data/macb.h index 044a124bfbbc..21b15f6fee25 100644 --- a/include/linux/platform_data/macb.h +++ b/include/linux/platform_data/macb.h @@ -8,11 +8,19 @@ #ifndef __MACB_PDATA_H__ #define __MACB_PDATA_H__ +/** + * struct macb_platform_data - platform data for MACB Ethernet + * @phy_mask: phy mask passed when register the MDIO bus + * within the driver + * @phy_irq_pin: PHY IRQ + * @is_rmii: using RMII interface? + * @rev_eth_addr: reverse Ethernet address byte order + */ struct macb_platform_data { u32 phy_mask; - int phy_irq_pin; /* PHY IRQ */ - u8 is_rmii; /* using RMII interface? */ - u8 rev_eth_addr; /* reverse Ethernet address byte order */ + int phy_irq_pin; + u8 is_rmii; + u8 rev_eth_addr; }; #endif /* __MACB_PDATA_H__ */ diff --git a/include/net/act_api.h b/include/net/act_api.h index 3ee4c92afd1b..931738bc5bba 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -99,7 +99,6 @@ struct tc_action_ops { int tcf_hash_search(struct tc_action *a, u32 index); void tcf_hash_destroy(struct tc_action *a); -int tcf_hash_release(struct tc_action *a, int bind); u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo); int tcf_hash_check(u32 index, struct tc_action *a, int bind); int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, @@ -107,6 +106,13 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); void tcf_hash_insert(struct tc_action *a); +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict); + +static inline int tcf_hash_release(struct tc_action *a, bool bind) +{ + return __tcf_hash_release(a, bind, false); +} + int tcf_register_action(struct tc_action_ops *a, unsigned int mask); int tcf_unregister_action(struct tc_action_ops *a); int tcf_action_destroy(struct list_head *actions, int bind); diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index e1300b3dd597..53eead2da743 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -21,13 +21,11 @@ struct netns_frags { * @INET_FRAG_FIRST_IN: first fragment has arrived * @INET_FRAG_LAST_IN: final fragment has arrived * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction - * @INET_FRAG_EVICTED: frag queue is being evicted */ enum { INET_FRAG_FIRST_IN = BIT(0), INET_FRAG_LAST_IN = BIT(1), INET_FRAG_COMPLETE = BIT(2), - INET_FRAG_EVICTED = BIT(3) }; /** @@ -45,6 +43,7 @@ enum { * @flags: fragment queue flags * @max_size: maximum received fragment size * @net: namespace that this frag belongs to + * @list_evictor: list of queues to forcefully evict (e.g. due to low memory) */ struct inet_frag_queue { spinlock_t lock; @@ -59,6 +58,7 @@ struct inet_frag_queue { __u8 flags; u16 max_size; struct netns_frags *net; + struct hlist_node list_evictor; }; #define INETFRAGS_HASHSZ 1024 @@ -125,6 +125,11 @@ static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f inet_frag_destroy(q, f); } +static inline bool inet_frag_evicting(struct inet_frag_queue *q) +{ + return !hlist_unhashed(&q->list_evictor); +} + /* Memory Tracking Functions. */ /* The default percpu_counter batch size is not big enough to scale to @@ -139,14 +144,14 @@ static inline int frag_mem_limit(struct netns_frags *nf) return percpu_counter_read(&nf->mem); } -static inline void sub_frag_mem_limit(struct inet_frag_queue *q, int i) +static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&q->net->mem, -i, frag_percpu_counter_batch); + __percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch); } -static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i) +static inline void add_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&q->net->mem, i, frag_percpu_counter_batch); + __percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch); } static inline void init_frag_mem_limit(struct netns_frags *nf) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 49c142bdf01e..5fa643b4e891 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -183,7 +183,6 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); struct fib_table { struct hlist_node tb_hlist; u32 tb_id; - int tb_default; int tb_num_default; struct rcu_head rcu; unsigned long *tb_data; @@ -290,7 +289,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); -void fib_select_default(struct fib_result *res); +void fib_select_default(const struct flowi4 *flp, struct fib_result *res); #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 095433b8a8b0..37cd3911d5c5 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -291,7 +291,7 @@ extern unsigned int nf_conntrack_max; extern unsigned int nf_conntrack_hash_rnd; void init_nf_conntrack_hash_rnd(void); -void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl); +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags); #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 29d6a94db54d..723b61c82b3f 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -68,7 +68,6 @@ struct ct_pcpu { spinlock_t lock; struct hlist_nulls_head unconfirmed; struct hlist_nulls_head dying; - struct hlist_nulls_head tmpl; }; struct netns_ct { diff --git a/include/net/sock.h b/include/net/sock.h index 05a8c1aea251..f21f0708ec59 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -902,7 +902,7 @@ void sk_stream_kill_queues(struct sock *sk); void sk_set_memalloc(struct sock *sk); void sk_clear_memalloc(struct sock *sk); -int sk_wait_data(struct sock *sk, long *timeo); +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb); struct request_sock_ops; struct timewait_sock_ops; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 3d0f7d2a0616..ad82324f710f 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2312,6 +2312,10 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) return 1; chan = conn->smp; + if (!chan) { + BT_ERR("SMP security requested but not available"); + return 1; + } if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) return 1; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 0ff6e1bbca91..fa7bfced888e 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -37,15 +37,30 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) { - if (!is_skb_forwardable(skb->dev, skb)) { - kfree_skb(skb); - } else { - skb_push(skb, ETH_HLEN); - br_drop_fake_rtable(skb); - skb_sender_cpu_clear(skb); - dev_queue_xmit(skb); + if (!is_skb_forwardable(skb->dev, skb)) + goto drop; + + skb_push(skb, ETH_HLEN); + br_drop_fake_rtable(skb); + skb_sender_cpu_clear(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL && + (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD))) { + int depth; + + if (!__vlan_get_protocol(skb, skb->protocol, &depth)) + goto drop; + + skb_set_network_header(skb, depth); } + dev_queue_xmit(skb); + + return 0; + +drop: + kfree_skb(skb); return 0; } EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 1198a3dbad95..c94321955db7 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -445,6 +445,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (p->port->state == BR_STATE_DISABLED) goto unlock; + entry->state = p->state; rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 79db489cdade..0b39dcc65b94 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1416,8 +1416,7 @@ br_multicast_leave_group(struct net_bridge *br, spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || - (port && port->state == BR_STATE_DISABLED) || - timer_pending(&other_query->timer)) + (port && port->state == BR_STATE_DISABLED)) goto out; mdb = mlock_dereference(br->mdb, br); @@ -1425,6 +1424,31 @@ br_multicast_leave_group(struct net_bridge *br, if (!mp) goto out; + if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { + struct net_bridge_port_group __rcu **pp; + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->port != port) + continue; + + rcu_assign_pointer(*pp, p->next); + hlist_del_init(&p->mglist); + del_timer(&p->timer); + call_rcu_bh(&p->rcu, br_multicast_free_pg); + br_mdb_notify(br->dev, port, group, RTM_DELMDB); + + if (!mp->ports && !mp->mglist && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + } + goto out; + } + + if (timer_pending(&other_query->timer)) + goto out; + if (br->multicast_querier) { __br_multicast_send_query(br, port, &mp->addr); @@ -1450,28 +1474,6 @@ br_multicast_leave_group(struct net_bridge *br, } } - if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { - struct net_bridge_port_group __rcu **pp; - - for (pp = &mp->ports; - (p = mlock_dereference(*pp, br)) != NULL; - pp = &p->next) { - if (p->port != port) - continue; - - rcu_assign_pointer(*pp, p->next); - hlist_del_init(&p->mglist); - del_timer(&p->timer); - call_rcu_bh(&p->rcu, br_multicast_free_pg); - br_mdb_notify(br->dev, port, group, RTM_DELMDB); - - if (!mp->ports && !mp->mglist && - netif_running(br->dev)) - mod_timer(&mp->timer, jiffies); - } - goto out; - } - now = jiffies; time = now + br->multicast_last_member_count * br->multicast_last_member_interval; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 364bdc98bd9b..3da5525eb8a2 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -693,9 +693,17 @@ static int br_port_slave_changelink(struct net_device *brdev, struct nlattr *tb[], struct nlattr *data[]) { + struct net_bridge *br = netdev_priv(brdev); + int ret; + if (!data) return 0; - return br_setport(br_port_get_rtnl(dev), data); + + spin_lock_bh(&br->lock); + ret = br_setport(br_port_get_rtnl(dev), data); + spin_unlock_bh(&br->lock); + + return ret; } static int br_port_fill_slave_info(struct sk_buff *skb, diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index b4b6dab9c285..ed74ffaa851f 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -209,8 +209,9 @@ void br_transmit_config(struct net_bridge_port *p) br_send_config_bpdu(p, &bpdu); p->topology_change_ack = 0; p->config_pending = 0; - mod_timer(&p->hold_timer, - round_jiffies(jiffies + BR_HOLD_TIME)); + if (p->br->stp_enabled == BR_KERNEL_STP) + mod_timer(&p->hold_timer, + round_jiffies(jiffies + BR_HOLD_TIME)); } } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index a2730e7196cd..4ca449a16132 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -48,7 +48,8 @@ void br_stp_enable_bridge(struct net_bridge *br) struct net_bridge_port *p; spin_lock_bh(&br->lock); - mod_timer(&br->hello_timer, jiffies + br->hello_time); + if (br->stp_enabled == BR_KERNEL_STP) + mod_timer(&br->hello_timer, jiffies + br->hello_time); mod_timer(&br->gc_timer, jiffies + HZ/10); br_config_bpdu_generation(br); @@ -127,6 +128,7 @@ static void br_stp_start(struct net_bridge *br) int r; char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; char *envp[] = { NULL }; + struct net_bridge_port *p; r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); @@ -140,6 +142,10 @@ static void br_stp_start(struct net_bridge *br) if (r == 0) { br->stp_enabled = BR_USER_STP; br_debug(br, "userspace STP started\n"); + /* Stop hello and hold timers */ + del_timer(&br->hello_timer); + list_for_each_entry(p, &br->port_list, list) + del_timer(&p->hold_timer); } else { br->stp_enabled = BR_KERNEL_STP; br_debug(br, "using kernel STP\n"); @@ -156,12 +162,17 @@ static void br_stp_stop(struct net_bridge *br) int r; char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL }; char *envp[] = { NULL }; + struct net_bridge_port *p; if (br->stp_enabled == BR_USER_STP) { r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); br_info(br, "userspace STP stopped, return code %d\n", r); /* To start timers on any ports left in blocking */ + mod_timer(&br->hello_timer, jiffies + br->hello_time); + list_for_each_entry(p, &br->port_list, list) + mod_timer(&p->hold_timer, + round_jiffies(jiffies + BR_HOLD_TIME)); spin_lock_bh(&br->lock); br_port_state_selection(br); spin_unlock_bh(&br->lock); diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 7caf7fae2d5b..5f0f5af0ec35 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -40,7 +40,9 @@ static void br_hello_timer_expired(unsigned long arg) if (br->dev->flags & IFF_UP) { br_config_bpdu_generation(br); - mod_timer(&br->hello_timer, round_jiffies(jiffies + br->hello_time)); + if (br->stp_enabled != BR_USER_STP) + mod_timer(&br->hello_timer, + round_jiffies(jiffies + br->hello_time)); } spin_unlock(&br->lock); } diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 1f2a126f4ffa..6441f47b1a8f 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -23,7 +23,8 @@ static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state struct cgroup_cls_state *task_cls_state(struct task_struct *p) { - return css_cls_state(task_css(p, net_cls_cgrp_id)); + return css_cls_state(task_css_check(p, net_cls_cgrp_id, + rcu_read_lock_bh_held())); } EXPORT_SYMBOL_GPL(task_cls_state); diff --git a/net/core/sock.c b/net/core/sock.c index 08f16db46070..193901d09757 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1497,7 +1497,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sock_copy(newsk, sk); /* SANITY */ - get_net(sock_net(newsk)); + if (likely(newsk->sk_net_refcnt)) + get_net(sock_net(newsk)); sk_node_init(&newsk->sk_node); sock_lock_init(newsk); bh_lock_sock(newsk); @@ -1967,20 +1968,21 @@ static void __release_sock(struct sock *sk) * sk_wait_data - wait for data to arrive at sk_receive_queue * @sk: sock to wait on * @timeo: for how long + * @skb: last skb seen on sk_receive_queue * * Now socket state including sk->sk_err is changed only under lock, * hence we may omit checks after joining wait queue. * We check receive queue before schedule() only as optimization; * it is very likely that release_sock() added new data. */ -int sk_wait_data(struct sock *sk, long *timeo) +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) { int rc; DEFINE_WAIT(wait); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); + rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); finish_wait(sk_sleep(sk), &wait); return rc; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 52a94016526d..b5cf13a28009 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -886,7 +886,7 @@ verify_sock_status: break; } - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); continue; found_ok_skb: if (len > skb->len) diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index f46e4d1306f2..214d44aef35b 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -207,7 +207,7 @@ found: } else { fq->q.meat += skb->len; } - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { @@ -287,7 +287,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, clone->data_len = clone->len; head->data_len -= clone->len; head->len -= clone->len; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } WARN_ON(head == NULL); @@ -310,7 +310,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&fq->q, sum_truesize); + sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 933a92820d26..6c8b1fbafce8 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1017,14 +1017,16 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) neigh = neigh_lookup(&arp_tbl, &ip, dev); if (neigh) { - read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); - r->arp_flags = arp_state_to_flags(neigh); - read_unlock_bh(&neigh->lock); - r->arp_ha.sa_family = dev->type; - strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev)); + if (!(neigh->nud_state & NUD_NOARP)) { + read_lock_bh(&neigh->lock); + memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); + r->arp_flags = arp_state_to_flags(neigh); + read_unlock_bh(&neigh->lock); + r->arp_ha.sa_family = dev->type; + strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev)); + err = 0; + } neigh_release(neigh); - err = 0; } return err; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e813196c91c7..2d9cb1748f81 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -882,7 +882,6 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); - blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); } return 0; } diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c6211ed60b03..9c02920725db 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -13,6 +13,7 @@ struct fib_alias { u8 fa_state; u8 fa_slen; u32 tb_id; + s16 fa_default; struct rcu_head rcu; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c7358ea4ae93..3a06586b170c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1202,23 +1202,40 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event) } /* Must be invoked inside of an RCU protected region. */ -void fib_select_default(struct fib_result *res) +void fib_select_default(const struct flowi4 *flp, struct fib_result *res) { struct fib_info *fi = NULL, *last_resort = NULL; struct hlist_head *fa_head = res->fa_head; struct fib_table *tb = res->table; + u8 slen = 32 - res->prefixlen; int order = -1, last_idx = -1; - struct fib_alias *fa; + struct fib_alias *fa, *fa1 = NULL; + u32 last_prio = res->fi->fib_priority; + u8 last_tos = 0; hlist_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; + if (fa->fa_slen != slen) + continue; + if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) + continue; + if (fa->tb_id != tb->tb_id) + continue; + if (next_fi->fib_priority > last_prio && + fa->fa_tos == last_tos) { + if (last_tos) + continue; + break; + } + if (next_fi->fib_flags & RTNH_F_DEAD) + continue; + last_tos = fa->fa_tos; + last_prio = next_fi->fib_priority; + if (next_fi->fib_scope != res->scope || fa->fa_type != RTN_UNICAST) continue; - - if (next_fi->fib_priority > res->fi->fib_priority) - break; if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) continue; @@ -1228,10 +1245,11 @@ void fib_select_default(struct fib_result *res) if (!fi) { if (next_fi != res->fi) break; + fa1 = fa; } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, tb->tb_default)) { + &last_idx, fa1->fa_default)) { fib_result_assign(res, fi); - tb->tb_default = order; + fa1->fa_default = order; goto out; } fi = next_fi; @@ -1239,20 +1257,21 @@ void fib_select_default(struct fib_result *res) } if (order <= 0 || !fi) { - tb->tb_default = -1; + if (fa1) + fa1->fa_default = -1; goto out; } if (!fib_detect_death(fi, order, &last_resort, &last_idx, - tb->tb_default)) { + fa1->fa_default)) { fib_result_assign(res, fi); - tb->tb_default = order; + fa1->fa_default = order; goto out; } if (last_idx >= 0) fib_result_assign(res, last_resort); - tb->tb_default = last_idx; + fa1->fa_default = last_idx; out: return; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 15d32612e3c6..37c4bb89a708 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1171,6 +1171,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = state & ~FA_S_ACCESSED; new_fa->fa_slen = fa->fa_slen; new_fa->tb_id = tb->tb_id; + new_fa->fa_default = -1; err = switchdev_fib_ipv4_add(key, plen, fi, new_fa->fa_tos, @@ -1222,6 +1223,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = 0; new_fa->fa_slen = slen; new_fa->tb_id = tb->tb_id; + new_fa->fa_default = -1; /* (Optionally) offload fib entry to switch hardware. */ err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, @@ -1791,8 +1793,6 @@ void fib_table_flush_external(struct fib_table *tb) if (hlist_empty(&n->leaf)) { put_child_root(pn, n->key, NULL); node_free(n); - } else { - leaf_pull_suffix(pn, n); } } } @@ -1862,8 +1862,6 @@ int fib_table_flush(struct fib_table *tb) if (hlist_empty(&n->leaf)) { put_child_root(pn, n->key, NULL); node_free(n); - } else { - leaf_pull_suffix(pn, n); } } @@ -1990,7 +1988,6 @@ struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) return NULL; tb->tb_id = id; - tb->tb_default = -1; tb->tb_num_default = 0; tb->tb_data = (alias ? alias->__data : tb->__data); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 5e346a082e5f..d0a7c0319e3d 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -131,34 +131,22 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) unsigned int evicted = 0; HLIST_HEAD(expired); -evict_again: spin_lock(&hb->chain_lock); hlist_for_each_entry_safe(fq, n, &hb->chain, list) { if (!inet_fragq_should_evict(fq)) continue; - if (!del_timer(&fq->timer)) { - /* q expiring right now thus increment its refcount so - * it won't be freed under us and wait until the timer - * has finished executing then destroy it - */ - atomic_inc(&fq->refcnt); - spin_unlock(&hb->chain_lock); - del_timer_sync(&fq->timer); - inet_frag_put(fq, f); - goto evict_again; - } + if (!del_timer(&fq->timer)) + continue; - fq->flags |= INET_FRAG_EVICTED; - hlist_del(&fq->list); - hlist_add_head(&fq->list, &expired); + hlist_add_head(&fq->list_evictor, &expired); ++evicted; } spin_unlock(&hb->chain_lock); - hlist_for_each_entry_safe(fq, n, &expired, list) + hlist_for_each_entry_safe(fq, n, &expired, list_evictor) f->frag_expire((unsigned long) fq); return evicted; @@ -240,18 +228,20 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) int i; nf->low_thresh = 0; - local_bh_disable(); evict_again: + local_bh_disable(); seq = read_seqbegin(&f->rnd_seqlock); for (i = 0; i < INETFRAGS_HASHSZ ; i++) inet_evict_bucket(f, &f->hash[i]); - if (read_seqretry(&f->rnd_seqlock, seq)) - goto evict_again; - local_bh_enable(); + cond_resched(); + + if (read_seqretry(&f->rnd_seqlock, seq) || + percpu_counter_sum(&nf->mem)) + goto evict_again; percpu_counter_destroy(&nf->mem); } @@ -284,8 +274,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) struct inet_frag_bucket *hb; hb = get_frag_bucket_locked(fq, f); - if (!(fq->flags & INET_FRAG_EVICTED)) - hlist_del(&fq->list); + hlist_del(&fq->list); + fq->flags |= INET_FRAG_COMPLETE; spin_unlock(&hb->chain_lock); } @@ -297,7 +287,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) if (!(fq->flags & INET_FRAG_COMPLETE)) { fq_unlink(fq, f); atomic_dec(&fq->refcnt); - fq->flags |= INET_FRAG_COMPLETE; } } EXPORT_SYMBOL(inet_frag_kill); @@ -330,11 +319,12 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) fp = xp; } sum = sum_truesize + f->qsize; - sub_frag_mem_limit(q, sum); if (f->destructor) f->destructor(q); kmem_cache_free(f->frags_cachep, q); + + sub_frag_mem_limit(nf, sum); } EXPORT_SYMBOL(inet_frag_destroy); @@ -390,7 +380,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, q->net = nf; f->constructor(q, arg); - add_frag_mem_limit(q, f->qsize); + add_frag_mem_limit(nf, f->qsize); setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 31f71b15cfba..921138f6c97c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -202,7 +202,7 @@ static void ip_expire(unsigned long arg) ipq_kill(qp); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); - if (!(qp->q.flags & INET_FRAG_EVICTED)) { + if (!inet_frag_evicting(&qp->q)) { struct sk_buff *head = qp->q.fragments; const struct iphdr *iph; int err; @@ -309,7 +309,7 @@ static int ip_frag_reinit(struct ipq *qp) kfree_skb(fp); fp = xp; } while (fp); - sub_frag_mem_limit(&qp->q, sum_truesize); + sub_frag_mem_limit(qp->q.net, sum_truesize); qp->q.flags = 0; qp->q.len = 0; @@ -455,7 +455,7 @@ found: qp->q.fragments = next; qp->q.meat -= free_it->len; - sub_frag_mem_limit(&qp->q, free_it->truesize); + sub_frag_mem_limit(qp->q.net, free_it->truesize); kfree_skb(free_it); } } @@ -479,7 +479,7 @@ found: qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; qp->ecn |= ecn; - add_frag_mem_limit(&qp->q, skb->truesize); + add_frag_mem_limit(qp->q.net, skb->truesize); if (offset == 0) qp->q.flags |= INET_FRAG_FIRST_IN; @@ -587,7 +587,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(&qp->q, clone->truesize); + add_frag_mem_limit(qp->q.net, clone->truesize); } skb_push(head, head->data - skb_network_header(head)); @@ -615,7 +615,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&qp->q, sum_truesize); + sub_frag_mem_limit(qp->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d0362a2de3d3..e681b852ced1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2176,7 +2176,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) if (!res.prefixlen && res.table->tb_num_default > 1 && res.type == RTN_UNICAST && !fl4->flowi4_oif) - fib_select_default(&res); + fib_select_default(fl4, &res); if (!fl4->saddr) fl4->saddr = FIB_RES_PREFSRC(net, res); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7f4056785acc..45534a5ab430 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -780,7 +780,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -EAGAIN; break; } - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); if (signal_pending(current)) { ret = sock_intr_errno(timeo); break; @@ -1575,7 +1575,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int target; /* Read at least this many bytes */ long timeo; struct task_struct *user_recv = NULL; - struct sk_buff *skb; + struct sk_buff *skb, *last; u32 urg_hole = 0; if (unlikely(flags & MSG_ERRQUEUE)) @@ -1635,7 +1635,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, /* Next get a buffer. */ + last = skb_peek_tail(&sk->sk_receive_queue); skb_queue_walk(&sk->sk_receive_queue, skb) { + last = skb; /* Now that we have two receive queues this * shouldn't happen. */ @@ -1754,8 +1756,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); - } else - sk_wait_data(sk, &timeo); + } else { + sk_wait_data(sk, &timeo, last); + } if (user_recv) { int chunk; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0a05b35a90fc..c53331cfed95 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1650,6 +1650,7 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_change_info *change_info; struct net *net = dev_net(dev); struct inet6_dev *idev; @@ -1664,6 +1665,11 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, ndisc_send_unsol_na(dev); in6_dev_put(idev); break; + case NETDEV_CHANGE: + change_info = ptr; + if (change_info->flags_changed & IFF_NOARP) + neigh_changeaddr(&nd_tbl, dev); + break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); fib6_run_gc(0, net, false); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f187c8d8a1b..6d02498172c1 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -348,7 +348,7 @@ found: fq->ecn |= ecn; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -430,7 +430,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -454,7 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - sub_frag_mem_limit(&fq->q, head->truesize); + sub_frag_mem_limit(fq->q.net, head->truesize); head->ignore_df = 1; head->next = NULL; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 8ffa2c8cce77..f1159bb76e0a 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -144,7 +144,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - if (fq->q.flags & INET_FRAG_EVICTED) + if (inet_frag_evicting(&fq->q)) goto out_rcu_unlock; IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); @@ -330,7 +330,7 @@ found: fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; fq->ecn |= ecn; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -443,7 +443,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -481,7 +481,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&fq->q, sum_truesize); + sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8fd9febaa5ba..8dab4e569571 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -613,7 +613,7 @@ static int llc_wait_data(struct sock *sk, long timeo) if (signal_pending(current)) break; rc = 0; - if (sk_wait_data(sk, &timeo)) + if (sk_wait_data(sk, &timeo, NULL)) break; } return rc; @@ -802,7 +802,7 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, release_sock(sk); lock_sock(sk); } else - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) { net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n", diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5d2b806a862e..38fbc194b9cb 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -319,7 +319,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * return *ignored=0 i.e. ICMP and NF_DROP */ sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb, iph); + if (sched) { + /* read svc->sched_data after svc->scheduler */ + smp_rmb(); + dest = sched->schedule(svc, skb, iph); + } else { + dest = NULL; + } if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); @@ -467,7 +473,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, } sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb, iph); + if (sched) { + /* read svc->sched_data after svc->scheduler */ + smp_rmb(); + dest = sched->schedule(svc, skb, iph); + } else { + dest = NULL; + } if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 285eae3a1454..24c554201a76 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -842,15 +842,16 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, __ip_vs_dst_cache_reset(dest); spin_unlock_bh(&dest->dst_lock); - sched = rcu_dereference_protected(svc->scheduler, 1); if (add) { ip_vs_start_estimator(svc->net, &dest->stats); list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; - if (sched->add_dest) + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched && sched->add_dest) sched->add_dest(svc, dest); } else { - if (sched->upd_dest) + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched && sched->upd_dest) sched->upd_dest(svc, dest); } } @@ -1084,7 +1085,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, struct ip_vs_scheduler *sched; sched = rcu_dereference_protected(svc->scheduler, 1); - if (sched->del_dest) + if (sched && sched->del_dest) sched->del_dest(svc, dest); } } @@ -1175,11 +1176,14 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, ip_vs_use_count_inc(); /* Lookup the scheduler by 'u->sched_name' */ - sched = ip_vs_scheduler_get(u->sched_name); - if (sched == NULL) { - pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); - ret = -ENOENT; - goto out_err; + if (strcmp(u->sched_name, "none")) { + sched = ip_vs_scheduler_get(u->sched_name); + if (!sched) { + pr_info("Scheduler module ip_vs_%s not found\n", + u->sched_name); + ret = -ENOENT; + goto out_err; + } } if (u->pe_name && *u->pe_name) { @@ -1240,10 +1244,12 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, spin_lock_init(&svc->stats.lock); /* Bind the scheduler */ - ret = ip_vs_bind_scheduler(svc, sched); - if (ret) - goto out_err; - sched = NULL; + if (sched) { + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) + goto out_err; + sched = NULL; + } /* Bind the ct retriever */ RCU_INIT_POINTER(svc->pe, pe); @@ -1291,17 +1297,20 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, static int ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) { - struct ip_vs_scheduler *sched, *old_sched; + struct ip_vs_scheduler *sched = NULL, *old_sched; struct ip_vs_pe *pe = NULL, *old_pe = NULL; int ret = 0; /* * Lookup the scheduler, by 'u->sched_name' */ - sched = ip_vs_scheduler_get(u->sched_name); - if (sched == NULL) { - pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); - return -ENOENT; + if (strcmp(u->sched_name, "none")) { + sched = ip_vs_scheduler_get(u->sched_name); + if (!sched) { + pr_info("Scheduler module ip_vs_%s not found\n", + u->sched_name); + return -ENOENT; + } } old_sched = sched; @@ -1329,14 +1338,20 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) old_sched = rcu_dereference_protected(svc->scheduler, 1); if (sched != old_sched) { + if (old_sched) { + ip_vs_unbind_scheduler(svc, old_sched); + RCU_INIT_POINTER(svc->scheduler, NULL); + /* Wait all svc->sched_data users */ + synchronize_rcu(); + } /* Bind the new scheduler */ - ret = ip_vs_bind_scheduler(svc, sched); - if (ret) { - old_sched = sched; - goto out; + if (sched) { + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) { + ip_vs_scheduler_put(sched); + goto out; + } } - /* Unbind the old scheduler on success */ - ip_vs_unbind_scheduler(svc, old_sched); } /* @@ -1982,6 +1997,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); + char *sched_name = sched ? sched->name : "none"; if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 @@ -1990,18 +2006,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), - sched->name); + sched_name); else #endif seq_printf(seq, "%s %08X:%04X %s %s ", ip_vs_proto_name(svc->protocol), ntohl(svc->addr.ip), ntohs(svc->port), - sched->name, + sched_name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } else { seq_printf(seq, "FWM %08X %s %s", - svc->fwmark, sched->name, + svc->fwmark, sched_name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } @@ -2427,13 +2443,15 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { struct ip_vs_scheduler *sched; struct ip_vs_kstats kstats; + char *sched_name; sched = rcu_dereference_protected(src->scheduler, 1); + sched_name = sched ? sched->name : "none"; dst->protocol = src->protocol; dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; - strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); + strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); dst->flags = src->flags; dst->timeout = src->timeout / HZ; dst->netmask = src->netmask; @@ -2892,6 +2910,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, struct ip_vs_flags flags = { .flags = svc->flags, .mask = ~0 }; struct ip_vs_kstats kstats; + char *sched_name; nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); if (!nl_service) @@ -2910,8 +2929,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, } sched = rcu_dereference_protected(svc->scheduler, 1); + sched_name = sched ? sched->name : "none"; pe = rcu_dereference_protected(svc->pe, 1); - if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || + if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 199760c71f39..7e8141647943 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -74,7 +74,7 @@ void ip_vs_unbind_scheduler(struct ip_vs_service *svc, if (sched->done_service) sched->done_service(svc); - /* svc->scheduler can not be set to NULL */ + /* svc->scheduler can be set to NULL only by caller */ } @@ -147,21 +147,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) { - struct ip_vs_scheduler *sched; + struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); + char *sched_name = sched ? sched->name : "none"; - sched = rcu_dereference(svc->scheduler); if (svc->fwmark) { IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", - sched->name, svc->fwmark, svc->fwmark, msg); + sched_name, svc->fwmark, svc->fwmark, msg); #ifdef CONFIG_IP_VS_IPV6 } else if (svc->af == AF_INET6) { IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", - sched->name, ip_vs_proto_name(svc->protocol), + sched_name, ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), msg); #endif } else { IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", - sched->name, ip_vs_proto_name(svc->protocol), + sched_name, ip_vs_proto_name(svc->protocol), &svc->addr.ip, ntohs(svc->port), msg); } } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index b08ba9538d12..d99ad93eb855 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -612,7 +612,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, pkts = atomic_add_return(1, &cp->in_pkts); else pkts = sysctl_sync_threshold(ipvs); - ip_vs_sync_conn(net, cp->control, pkts); + ip_vs_sync_conn(net, cp, pkts); } } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index bf66a8657a5f..258a0b0e82a2 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -130,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; - fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? FLOWI_FLAG_KNOWN_NH : 0; @@ -505,6 +504,13 @@ err_put: return -1; err_unreach: + /* The ip6_link_failure function requires the dev field to be set + * in order to get the net (further for the sake of fwmark + * reflection). + */ + if (!skb->dev) + skb->dev = skb_dst(skb)->dev; + dst_link_failure(skb); return -1; } @@ -523,10 +529,27 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, if (ret == NF_ACCEPT) { nf_reset(skb); skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); } return ret; } +/* In the event of a remote destination, it's possible that we would have + * matches against an old socket (particularly a TIME-WAIT socket). This + * causes havoc down the line (ip_local_out et. al. expect regular sockets + * and invalid memory accesses will happen) so simply drop the association + * in this case. +*/ +static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb) +{ + /* If dev is set, the packet came from the LOCAL_IN callback and + * not from a local TCP socket. + */ + if (skb->dev) + skb_orphan(skb); +} + /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, struct ip_vs_conn *cp, int local) @@ -538,12 +561,23 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 1); + + /* Remove the early_demux association unless it's bound for the + * exact same port and address on this host after translation. + */ + if (!local || cp->vport != cp->dport || + !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr)) + ip_vs_drop_early_demux_sk(skb); + if (!local) { skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, NULL, skb_dst(skb)->dev, dst_output_sk); } else ret = NF_ACCEPT; + return ret; } @@ -557,7 +591,10 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) ip_vs_notrack(skb); if (!local) { + ip_vs_drop_early_demux_sk(skb); skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, NULL, skb_dst(skb)->dev, dst_output_sk); } else @@ -845,6 +882,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, struct ipv6hdr *old_ipv6h = NULL; #endif + ip_vs_drop_early_demux_sk(skb); + if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 13fad8668f83..651039ad1681 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -287,6 +287,46 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) spin_unlock(&pcpu->lock); } +/* Released via destroy_conntrack() */ +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags) +{ + struct nf_conn *tmpl; + + tmpl = kzalloc(sizeof(struct nf_conn), GFP_KERNEL); + if (tmpl == NULL) + return NULL; + + tmpl->status = IPS_TEMPLATE; + write_pnet(&tmpl->ct_net, net); + +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (zone) { + struct nf_conntrack_zone *nf_ct_zone; + + nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, GFP_ATOMIC); + if (!nf_ct_zone) + goto out_free; + nf_ct_zone->id = zone; + } +#endif + atomic_set(&tmpl->ct_general.use, 0); + + return tmpl; +#ifdef CONFIG_NF_CONNTRACK_ZONES +out_free: + kfree(tmpl); + return NULL; +#endif +} +EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); + +static void nf_ct_tmpl_free(struct nf_conn *tmpl) +{ + nf_ct_ext_destroy(tmpl); + nf_ct_ext_free(tmpl); + kfree(tmpl); +} + static void destroy_conntrack(struct nf_conntrack *nfct) { @@ -298,6 +338,10 @@ destroy_conntrack(struct nf_conntrack *nfct) NF_CT_ASSERT(atomic_read(&nfct->use) == 0); NF_CT_ASSERT(!timer_pending(&ct->timeout)); + if (unlikely(nf_ct_is_template(ct))) { + nf_ct_tmpl_free(ct); + return; + } rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto && l4proto->destroy) @@ -540,28 +584,6 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); -/* deletion from this larval template list happens via nf_ct_put() */ -void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl) -{ - struct ct_pcpu *pcpu; - - __set_bit(IPS_TEMPLATE_BIT, &tmpl->status); - __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); - nf_conntrack_get(&tmpl->ct_general); - - /* add this conntrack to the (per cpu) tmpl list */ - local_bh_disable(); - tmpl->cpu = smp_processor_id(); - pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu); - - spin_lock(&pcpu->lock); - /* Overload tuple linked list to put us in template list. */ - hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &pcpu->tmpl); - spin_unlock_bh(&pcpu->lock); -} -EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert); - /* Confirm a connection given skb; places it in hash table */ int __nf_conntrack_confirm(struct sk_buff *skb) @@ -1751,7 +1773,6 @@ int nf_conntrack_init_net(struct net *net) spin_lock_init(&pcpu->lock); INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); - INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL); } net->ct.stat = alloc_percpu(struct ip_conntrack_stat); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 7a17070c5dab..b45a4223cb05 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -219,7 +219,8 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; } - return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); + return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) && + nf_ct_zone(a->master) == nf_ct_zone(b->master); } static inline int expect_matches(const struct nf_conntrack_expect *a, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d1c23940a86a..6b8b0abbfab4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2995,11 +2995,6 @@ ctnetlink_create_expect(struct net *net, u16 zone, } err = nf_ct_expect_related_report(exp, portid, report); - if (err < 0) - goto err_exp; - - return 0; -err_exp: nf_ct_expect_put(exp); err_ct: nf_ct_put(ct); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 789feeae6c44..71f1e9fdfa18 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -349,12 +349,10 @@ static void __net_exit synproxy_proc_exit(struct net *net) static int __net_init synproxy_net_init(struct net *net) { struct synproxy_net *snet = synproxy_pernet(net); - struct nf_conntrack_tuple t; struct nf_conn *ct; int err = -ENOMEM; - memset(&t, 0, sizeof(t)); - ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL); + ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL); if (IS_ERR(ct)) { err = PTR_ERR(ct); goto err1; @@ -365,7 +363,8 @@ static int __net_init synproxy_net_init(struct net *net) if (!nfct_synproxy_ext_add(ct)) goto err2; - nf_conntrack_tmpl_insert(net, ct); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_get(&ct->ct_general); snet->tmpl = ct; snet->stats = alloc_percpu(struct synproxy_stats); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 75747aecdebe..c6630030c912 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -184,7 +184,6 @@ out: static int xt_ct_tg_check(const struct xt_tgchk_param *par, struct xt_ct_target_info_v1 *info) { - struct nf_conntrack_tuple t; struct nf_conn *ct; int ret = -EOPNOTSUPP; @@ -202,8 +201,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, if (ret < 0) goto err1; - memset(&t, 0, sizeof(t)); - ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL); + ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL); ret = PTR_ERR(ct); if (IS_ERR(ct)) goto err2; @@ -227,8 +225,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, if (ret < 0) goto err3; } - - nf_conntrack_tmpl_insert(par->net, ct); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_get(&ct->ct_general); out: info->ct = ct; return 0; diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index f407ebc13481..29d2c31f406c 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -126,6 +126,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) goto out; } + sysfs_attr_init(&info->timer->attr.attr); info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); if (!info->timer->attr.attr.name) { ret = -ENOMEM; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c9e8741226c6..ed458b315ef4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2403,7 +2403,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, addr, hlen); - if (tp_len > dev->mtu + dev->hard_header_len) { + if (likely(tp_len >= 0) && + tp_len > dev->mtu + dev->hard_header_len) { struct ethhdr *ehdr; /* Earlier code assumed this would be a VLAN pkt, * double-check this now that we have the actual @@ -2784,7 +2785,7 @@ static int packet_release(struct socket *sock) static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) { struct packet_sock *po = pkt_sk(sk); - const struct net_device *dev_curr; + struct net_device *dev_curr; __be16 proto_curr; bool need_rehook; @@ -2808,15 +2809,13 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) po->num = proto; po->prot_hook.type = proto; - - if (po->prot_hook.dev) - dev_put(po->prot_hook.dev); - po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; packet_cached_dev_assign(po, dev); } + if (dev_curr) + dev_put(dev_curr); if (proto == 0 || !need_rehook) goto out_unlock; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index af427a3dbcba..43ec92680ae8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -45,7 +45,7 @@ void tcf_hash_destroy(struct tc_action *a) } EXPORT_SYMBOL(tcf_hash_destroy); -int tcf_hash_release(struct tc_action *a, int bind) +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict) { struct tcf_common *p = a->priv; int ret = 0; @@ -53,7 +53,7 @@ int tcf_hash_release(struct tc_action *a, int bind) if (p) { if (bind) p->tcfc_bindcnt--; - else if (p->tcfc_bindcnt > 0) + else if (strict && p->tcfc_bindcnt > 0) return -EPERM; p->tcfc_refcnt--; @@ -64,9 +64,10 @@ int tcf_hash_release(struct tc_action *a, int bind) ret = 1; } } + return ret; } -EXPORT_SYMBOL(tcf_hash_release); +EXPORT_SYMBOL(__tcf_hash_release); static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, struct tc_action *a) @@ -136,7 +137,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a) head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; hlist_for_each_entry_safe(p, n, head, tcfc_head) { a->priv = p; - ret = tcf_hash_release(a, 0); + ret = __tcf_hash_release(a, false, true); if (ret == ACT_P_DELETED) { module_put(a->ops->owner); n_i++; @@ -408,7 +409,7 @@ int tcf_action_destroy(struct list_head *actions, int bind) int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { - ret = tcf_hash_release(a, bind); + ret = __tcf_hash_release(a, bind, true); if (ret == ACT_P_DELETED) module_put(a->ops->owner); else if (ret < 0) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 1df78289e248..d0edeb7a1950 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -27,9 +27,10 @@ struct tcf_bpf_cfg { struct bpf_prog *filter; struct sock_filter *bpf_ops; - char *bpf_name; + const char *bpf_name; u32 bpf_fd; u16 bpf_num_ops; + bool is_ebpf; }; static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, @@ -207,6 +208,7 @@ static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) cfg->bpf_ops = bpf_ops; cfg->bpf_num_ops = bpf_num_ops; cfg->filter = fp; + cfg->is_ebpf = false; return 0; } @@ -241,18 +243,40 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) cfg->bpf_fd = bpf_fd; cfg->bpf_name = name; cfg->filter = fp; + cfg->is_ebpf = true; return 0; } +static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg) +{ + if (cfg->is_ebpf) + bpf_prog_put(cfg->filter); + else + bpf_prog_destroy(cfg->filter); + + kfree(cfg->bpf_ops); + kfree(cfg->bpf_name); +} + +static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, + struct tcf_bpf_cfg *cfg) +{ + cfg->is_ebpf = tcf_bpf_is_ebpf(prog); + cfg->filter = prog->filter; + + cfg->bpf_ops = prog->bpf_ops; + cfg->bpf_name = prog->bpf_name; +} + static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *act, int replace, int bind) { struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tcf_bpf_cfg cfg, old; struct tc_act_bpf *parm; struct tcf_bpf *prog; - struct tcf_bpf_cfg cfg; bool is_bpf, is_ebpf; int ret; @@ -301,6 +325,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, prog = to_bpf(act); spin_lock_bh(&prog->tcf_lock); + if (ret != ACT_P_CREATED) + tcf_bpf_prog_fill_cfg(prog, &old); + prog->bpf_ops = cfg.bpf_ops; prog->bpf_name = cfg.bpf_name; @@ -316,32 +343,22 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (ret == ACT_P_CREATED) tcf_hash_insert(act); + else + tcf_bpf_cfg_cleanup(&old); return ret; destroy_fp: - if (is_ebpf) - bpf_prog_put(cfg.filter); - else - bpf_prog_destroy(cfg.filter); - - kfree(cfg.bpf_ops); - kfree(cfg.bpf_name); - + tcf_bpf_cfg_cleanup(&cfg); return ret; } static void tcf_bpf_cleanup(struct tc_action *act, int bind) { - const struct tcf_bpf *prog = act->priv; - - if (tcf_bpf_is_ebpf(prog)) - bpf_prog_put(prog->filter); - else - bpf_prog_destroy(prog->filter); + struct tcf_bpf_cfg tmp; - kfree(prog->bpf_ops); - kfree(prog->bpf_name); + tcf_bpf_prog_fill_cfg(act->priv, &tmp); + tcf_bpf_cfg_cleanup(&tmp); } static struct tc_action_ops act_bpf_ops __read_mostly = { diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 17e6d6669c7f..ff8b466a73f6 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -68,13 +68,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; } else { - p = to_pedit(a); - tcf_hash_release(a, bind); if (bind) return 0; + tcf_hash_release(a, bind); if (!ovr) return -EEXIST; - + p = to_pedit(a); if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 93d5742dc7e0..6a783afe4960 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -385,6 +385,19 @@ static void choke_reset(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); + while (q->head != q->tail) { + struct sk_buff *skb = q->tab[q->head]; + + q->head = (q->head + 1) & q->tab_mask; + if (!skb) + continue; + qdisc_qstats_backlog_dec(sch, skb); + --sch->q.qlen; + qdisc_drop(skb, sch); + } + + memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); + q->head = q->tail = 0; red_restart(&q->vars); } diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index 89f8fcf73f18..ade9445a55ab 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -216,6 +216,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = { .peek = qdisc_peek_head, .init = plug_init, .change = plug_change, + .reset = qdisc_reset_queue, .owner = THIS_MODULE, }; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1425ec2bbd5a..17bef01b9aa3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2200,12 +2200,6 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval, if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen)) return -EFAULT; - if (sctp_sk(sk)->subscribe.sctp_data_io_event) - pr_warn_ratelimited(DEPRECATED "%s (pid %d) " - "Requested SCTP_SNDRCVINFO event.\n" - "Use SCTP_RCVINFO through SCTP_RECVRCVINFO option instead.\n", - current->comm, task_pid_nr(current)); - /* At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT, * if there is no data to be sent or retransmit, the stack will * immediately send up this notification. |