diff options
89 files changed, 3921 insertions, 2025 deletions
diff --git a/Documentation/devicetree/bindings/net/microchip,lan78xx.txt b/Documentation/devicetree/bindings/net/microchip,lan78xx.txt new file mode 100644 index 000000000000..76786a0f6d3d --- /dev/null +++ b/Documentation/devicetree/bindings/net/microchip,lan78xx.txt @@ -0,0 +1,54 @@ +Microchip LAN78xx Gigabit Ethernet controller + +The LAN78XX devices are usually configured by programming their OTP or with +an external EEPROM, but some platforms (e.g. Raspberry Pi 3 B+) have neither. +The Device Tree properties, if present, override the OTP and EEPROM. + +Required properties: +- compatible: Should be one of "usb424,7800", "usb424,7801" or "usb424,7850". + +Optional properties: +- local-mac-address: see ethernet.txt +- mac-address: see ethernet.txt + +Optional properties of the embedded PHY: +- microchip,led-modes: a 0..4 element vector, with each element configuring + the operating mode of an LED. Omitted LEDs are turned off. Allowed values + are defined in "include/dt-bindings/net/microchip-lan78xx.h". + +Example: + +/* Based on the configuration for a Raspberry Pi 3 B+ */ +&usb { + usb-port@1 { + compatible = "usb424,2514"; + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + + usb-port@1 { + compatible = "usb424,2514"; + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + + ethernet: ethernet@1 { + compatible = "usb424,7800"; + reg = <1>; + local-mac-address = [ 00 11 22 33 44 55 ]; + + mdio { + #address-cells = <0x1>; + #size-cells = <0x0>; + eth_phy: ethernet-phy@1 { + reg = <1>; + microchip,led-modes = < + LAN78XX_LINK_1000_ACTIVITY + LAN78XX_LINK_10_100_ACTIVITY + >; + }; + }; + }; + }; + }; +}; diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt index 96398cc2982f..fc8f01718690 100644 --- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt +++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt @@ -13,13 +13,25 @@ Required properties: - reg: Address where registers are mapped and size of region. - interrupts: Should contain the MAC interrupt. - phy-mode: See ethernet.txt in the same directory. Allow to choose - "rgmii", "rmii", or "mii" according to the PHY. + "rgmii", "rmii", "mii", or "internal" according to the PHY. + The acceptable mode is SoC-dependent. - phy-handle: Should point to the external phy device. See ethernet.txt file in the same directory. - clocks: A phandle to the clock for the MAC. + For Pro4 SoC, that is "socionext,uniphier-pro4-ave4", + another MAC clock, GIO bus clock and PHY clock are also required. + - clock-names: Should contain + - "ether", "ether-gb", "gio", "ether-phy" for Pro4 SoC + - "ether" for others + - resets: A phandle to the reset control for the MAC. For Pro4 SoC, + GIO bus reset is also required. + - reset-names: Should contain + - "ether", "gio" for Pro4 SoC + - "ether" for others + - socionext,syscon-phy-mode: A phandle to syscon with one argument + that configures phy mode. The argument is the ID of MAC instance. Optional properties: - - resets: A phandle to the reset control for the MAC. - local-mac-address: See ethernet.txt in the same directory. Required subnode: @@ -34,8 +46,11 @@ Example: interrupts = <0 66 4>; phy-mode = "rgmii"; phy-handle = <ðphy>; + clock-names = "ether"; clocks = <&sys_clk 6>; + reset-names = "ether"; resets = <&sys_rst 6>; + socionext,syscon-phy-mode = <&soc_glue 0>; local-mac-address = [00 00 00 00 00 00]; mdio { diff --git a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt index 77cd42cc5f54..b025770eeb92 100644 --- a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt +++ b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt @@ -17,7 +17,8 @@ pool management. Required properties: -- compatible : Must be "ti,keystone-navigator-qmss"; +- compatible : Must be "ti,keystone-navigator-qmss". + : Must be "ti,66ak2g-navss-qm" for QMSS on K2G SoC. - clocks : phandle to the reference clock for this device. - queue-range : <start number> total range of queue numbers for the device. - linkram0 : <address size> for internal link ram, where size is the total @@ -39,6 +40,12 @@ Required properties: - Descriptor memory setup region. - Queue Management/Queue Proxy region for queue Push. - Queue Management/Queue Proxy region for queue Pop. + +For QMSS on K2G SoC, following QM reg indexes are used in that order + - Queue Peek region. + - Queue configuration region. + - Queue Management/Queue Proxy region for queue Push/Pop. + - queue-pools : child node classifying the queue ranges into pools. Queue ranges are grouped into 3 type of pools: - qpend : pool of qpend(interruptible) queues diff --git a/MAINTAINERS b/MAINTAINERS index b60179d948bb..81465707d8a8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5321,7 +5321,6 @@ F: include/linux/*mdio*.h F: include/linux/of_net.h F: include/linux/phy.h F: include/linux/phy_fixed.h -F: include/linux/platform_data/mdio-gpio.h F: include/linux/platform_data/mdio-bcm-unimac.h F: include/trace/events/mdio.h F: include/uapi/linux/mdio.h @@ -14572,7 +14571,9 @@ M: Woojung Huh <woojung.huh@microchip.com> M: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> L: netdev@vger.kernel.org S: Maintained +F: Documentation/devicetree/bindings/net/microchip,lan78xx.txt F: drivers/net/usb/lan78xx.* +F: include/dt-bindings/net/microchip-lan78xx.h USB MASS STORAGE DRIVER M: Alan Stern <stern@rowland.harvard.edu> diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig index 9fee7c83ef9f..f2f0264c58ba 100644 --- a/drivers/net/ethernet/8390/Kconfig +++ b/drivers/net/ethernet/8390/Kconfig @@ -29,8 +29,8 @@ config PCMCIA_AXNET called axnet_cs. If unsure, say N. config AX88796 - tristate "ASIX AX88796 NE2000 clone support" - depends on (ARM || MIPS || SUPERH) + tristate "ASIX AX88796 NE2000 clone support" if !ZORRO + depends on (ARM || MIPS || SUPERH || ZORRO || COMPILE_TEST) select CRC32 select PHYLIB select MDIO_BITBANG @@ -45,6 +45,19 @@ config AX88796_93CX6 ---help--- Select this if your platform comes with an external 93CX6 eeprom. +config XSURF100 + tristate "Amiga XSurf 100 AX88796/NE2000 clone support" + depends on ZORRO + select AX88796 + select ASIX_PHY + help + This driver is for the Individual Computers X-Surf 100 Ethernet + card (based on the Asix AX88796 chip). If you have such a card, + say Y. Otherwise, say N. + + To compile this driver as a module, choose M here: the module + will be called xsurf100. + config HYDRA tristate "Hydra support" depends on ZORRO diff --git a/drivers/net/ethernet/8390/Makefile b/drivers/net/ethernet/8390/Makefile index 1d650e66cc6e..85c83c566ec6 100644 --- a/drivers/net/ethernet/8390/Makefile +++ b/drivers/net/ethernet/8390/Makefile @@ -16,4 +16,5 @@ obj-$(CONFIG_PCMCIA_PCNET) += pcnet_cs.o 8390.o obj-$(CONFIG_STNIC) += stnic.o 8390.o obj-$(CONFIG_ULTRA) += smc-ultra.o 8390.o obj-$(CONFIG_WD80x3) += wd.o 8390.o +obj-$(CONFIG_XSURF100) += xsurf100.o obj-$(CONFIG_ZORRO8390) += zorro8390.o diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index da61cf3cb3a9..2a0ddec1dd56 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -163,6 +163,21 @@ static void ax_reset_8390(struct net_device *dev) ei_outb(ENISR_RESET, addr + EN0_ISR); /* Ack intr. */ } +/* Wrapper for __ei_interrupt for platforms that have a platform-specific + * way to find out whether the interrupt request might be caused by + * the ax88796 chip. + */ +static irqreturn_t ax_ei_interrupt_filtered(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct ax_device *ax = to_ax_dev(dev); + struct platform_device *pdev = to_platform_device(dev->dev.parent); + + if (!ax->plat->check_irq(pdev)) + return IRQ_NONE; + + return ax_ei_interrupt(irq, dev_id); +} static void ax_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr, int ring_page) @@ -387,6 +402,90 @@ static void ax_phy_switch(struct net_device *dev, int on) ei_outb(reg_gpoc, ei_local->mem + EI_SHIFT(0x17)); } +static void ax_bb_mdc(struct mdiobb_ctrl *ctrl, int level) +{ + struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); + + if (level) + ax->reg_memr |= AX_MEMR_MDC; + else + ax->reg_memr &= ~AX_MEMR_MDC; + + ei_outb(ax->reg_memr, ax->addr_memr); +} + +static void ax_bb_dir(struct mdiobb_ctrl *ctrl, int output) +{ + struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); + + if (output) + ax->reg_memr &= ~AX_MEMR_MDIR; + else + ax->reg_memr |= AX_MEMR_MDIR; + + ei_outb(ax->reg_memr, ax->addr_memr); +} + +static void ax_bb_set_data(struct mdiobb_ctrl *ctrl, int value) +{ + struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); + + if (value) + ax->reg_memr |= AX_MEMR_MDO; + else + ax->reg_memr &= ~AX_MEMR_MDO; + + ei_outb(ax->reg_memr, ax->addr_memr); +} + +static int ax_bb_get_data(struct mdiobb_ctrl *ctrl) +{ + struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); + int reg_memr = ei_inb(ax->addr_memr); + + return reg_memr & AX_MEMR_MDI ? 1 : 0; +} + +static const struct mdiobb_ops bb_ops = { + .owner = THIS_MODULE, + .set_mdc = ax_bb_mdc, + .set_mdio_dir = ax_bb_dir, + .set_mdio_data = ax_bb_set_data, + .get_mdio_data = ax_bb_get_data, +}; + +static int ax_mii_init(struct net_device *dev) +{ + struct platform_device *pdev = to_platform_device(dev->dev.parent); + struct ei_device *ei_local = netdev_priv(dev); + struct ax_device *ax = to_ax_dev(dev); + int err; + + ax->bb_ctrl.ops = &bb_ops; + ax->addr_memr = ei_local->mem + AX_MEMR; + ax->mii_bus = alloc_mdio_bitbang(&ax->bb_ctrl); + if (!ax->mii_bus) { + err = -ENOMEM; + goto out; + } + + ax->mii_bus->name = "ax88796_mii_bus"; + ax->mii_bus->parent = dev->dev.parent; + snprintf(ax->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + pdev->name, pdev->id); + + err = mdiobus_register(ax->mii_bus); + if (err) + goto out_free_mdio_bitbang; + + return 0; + + out_free_mdio_bitbang: + free_mdio_bitbang(ax->mii_bus); + out: + return err; +} + static int ax_open(struct net_device *dev) { struct ax_device *ax = to_ax_dev(dev); @@ -394,8 +493,16 @@ static int ax_open(struct net_device *dev) netdev_dbg(dev, "open\n"); - ret = request_irq(dev->irq, ax_ei_interrupt, ax->irqflags, - dev->name, dev); + ret = ax_mii_init(dev); + if (ret) + goto failed_mii; + + if (ax->plat->check_irq) + ret = request_irq(dev->irq, ax_ei_interrupt_filtered, + ax->irqflags, dev->name, dev); + else + ret = request_irq(dev->irq, ax_ei_interrupt, ax->irqflags, + dev->name, dev); if (ret) goto failed_request_irq; @@ -421,6 +528,10 @@ static int ax_open(struct net_device *dev) ax_phy_switch(dev, 0); free_irq(dev->irq, dev); failed_request_irq: + /* unregister mdiobus */ + mdiobus_unregister(ax->mii_bus); + free_mdio_bitbang(ax->mii_bus); + failed_mii: return ret; } @@ -440,6 +551,9 @@ static int ax_close(struct net_device *dev) phy_disconnect(dev->phydev); free_irq(dev->irq, dev); + + mdiobus_unregister(ax->mii_bus); + free_mdio_bitbang(ax->mii_bus); return 0; } @@ -539,92 +653,8 @@ static const struct net_device_ops ax_netdev_ops = { #endif }; -static void ax_bb_mdc(struct mdiobb_ctrl *ctrl, int level) -{ - struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); - - if (level) - ax->reg_memr |= AX_MEMR_MDC; - else - ax->reg_memr &= ~AX_MEMR_MDC; - - ei_outb(ax->reg_memr, ax->addr_memr); -} - -static void ax_bb_dir(struct mdiobb_ctrl *ctrl, int output) -{ - struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); - - if (output) - ax->reg_memr &= ~AX_MEMR_MDIR; - else - ax->reg_memr |= AX_MEMR_MDIR; - - ei_outb(ax->reg_memr, ax->addr_memr); -} - -static void ax_bb_set_data(struct mdiobb_ctrl *ctrl, int value) -{ - struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); - - if (value) - ax->reg_memr |= AX_MEMR_MDO; - else - ax->reg_memr &= ~AX_MEMR_MDO; - - ei_outb(ax->reg_memr, ax->addr_memr); -} - -static int ax_bb_get_data(struct mdiobb_ctrl *ctrl) -{ - struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); - int reg_memr = ei_inb(ax->addr_memr); - - return reg_memr & AX_MEMR_MDI ? 1 : 0; -} - -static const struct mdiobb_ops bb_ops = { - .owner = THIS_MODULE, - .set_mdc = ax_bb_mdc, - .set_mdio_dir = ax_bb_dir, - .set_mdio_data = ax_bb_set_data, - .get_mdio_data = ax_bb_get_data, -}; - /* setup code */ -static int ax_mii_init(struct net_device *dev) -{ - struct platform_device *pdev = to_platform_device(dev->dev.parent); - struct ei_device *ei_local = netdev_priv(dev); - struct ax_device *ax = to_ax_dev(dev); - int err; - - ax->bb_ctrl.ops = &bb_ops; - ax->addr_memr = ei_local->mem + AX_MEMR; - ax->mii_bus = alloc_mdio_bitbang(&ax->bb_ctrl); - if (!ax->mii_bus) { - err = -ENOMEM; - goto out; - } - - ax->mii_bus->name = "ax88796_mii_bus"; - ax->mii_bus->parent = dev->dev.parent; - snprintf(ax->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", - pdev->name, pdev->id); - - err = mdiobus_register(ax->mii_bus); - if (err) - goto out_free_mdio_bitbang; - - return 0; - - out_free_mdio_bitbang: - free_mdio_bitbang(ax->mii_bus); - out: - return err; -} - static void ax_initial_setup(struct net_device *dev, struct ei_device *ei_local) { void __iomem *ioaddr = ei_local->mem; @@ -669,10 +699,16 @@ static int ax_init_dev(struct net_device *dev) if (ax->plat->flags & AXFLG_HAS_EEPROM) { unsigned char SA_prom[32]; + ei_outb(6, ioaddr + EN0_RCNTLO); + ei_outb(0, ioaddr + EN0_RCNTHI); + ei_outb(0, ioaddr + EN0_RSARLO); + ei_outb(0, ioaddr + EN0_RSARHI); + ei_outb(E8390_RREAD + E8390_START, ioaddr + NE_CMD); for (i = 0; i < sizeof(SA_prom); i += 2) { SA_prom[i] = ei_inb(ioaddr + NE_DATAPORT); SA_prom[i + 1] = ei_inb(ioaddr + NE_DATAPORT); } + ei_outb(ENISR_RDC, ioaddr + EN0_ISR); /* Ack intr. */ if (ax->plat->wordlength == 2) for (i = 0; i < 16; i++) @@ -741,18 +777,20 @@ static int ax_init_dev(struct net_device *dev) #endif ei_local->reset_8390 = &ax_reset_8390; - ei_local->block_input = &ax_block_input; - ei_local->block_output = &ax_block_output; + if (ax->plat->block_input) + ei_local->block_input = ax->plat->block_input; + else + ei_local->block_input = &ax_block_input; + if (ax->plat->block_output) + ei_local->block_output = ax->plat->block_output; + else + ei_local->block_output = &ax_block_output; ei_local->get_8390_hdr = &ax_get_8390_hdr; ei_local->priv = 0; dev->netdev_ops = &ax_netdev_ops; dev->ethtool_ops = &ax_ethtool_ops; - ret = ax_mii_init(dev); - if (ret) - goto err_out; - ax_NS8390_init(dev, 0); ret = register_netdev(dev); @@ -777,7 +815,6 @@ static int ax_remove(struct platform_device *pdev) struct resource *mem; unregister_netdev(dev); - free_irq(dev->irq, dev); iounmap(ei_local->mem); mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -789,6 +826,7 @@ static int ax_remove(struct platform_device *pdev) release_mem_region(mem->start, resource_size(mem)); } + platform_set_drvdata(pdev, NULL); free_netdev(dev); return 0; @@ -835,6 +873,9 @@ static int ax_probe(struct platform_device *pdev) dev->irq = irq->start; ax->irqflags = irq->flags & IRQF_TRIGGER_MASK; + if (irq->flags & IORESOURCE_IRQ_SHAREABLE) + ax->irqflags |= IRQF_SHARED; + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!mem) { dev_err(&pdev->dev, "no MEM specified\n"); @@ -919,6 +960,7 @@ static int ax_probe(struct platform_device *pdev) release_mem_region(mem->start, mem_size); exit_mem: + platform_set_drvdata(pdev, NULL); free_netdev(dev); return ret; diff --git a/drivers/net/ethernet/8390/xsurf100.c b/drivers/net/ethernet/8390/xsurf100.c new file mode 100644 index 000000000000..e2c963821ffe --- /dev/null +++ b/drivers/net/ethernet/8390/xsurf100.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <linux/zorro.h> +#include <net/ax88796.h> +#include <asm/amigaints.h> + +#define ZORRO_PROD_INDIVIDUAL_COMPUTERS_X_SURF100 \ + ZORRO_ID(INDIVIDUAL_COMPUTERS, 0x64, 0) + +#define XS100_IRQSTATUS_BASE 0x40 +#define XS100_8390_BASE 0x800 + +/* Longword-access area. Translated to 2 16-bit access cycles by the + * X-Surf 100 FPGA + */ +#define XS100_8390_DATA32_BASE 0x8000 +#define XS100_8390_DATA32_SIZE 0x2000 +/* Sub-Areas for fast data register access; addresses relative to area begin */ +#define XS100_8390_DATA_READ32_BASE 0x0880 +#define XS100_8390_DATA_WRITE32_BASE 0x0C80 +#define XS100_8390_DATA_AREA_SIZE 0x80 + +#define __NS8390_init ax_NS8390_init + +/* force unsigned long back to 'void __iomem *' */ +#define ax_convert_addr(_a) ((void __force __iomem *)(_a)) + +#define ei_inb(_a) z_readb(ax_convert_addr(_a)) +#define ei_outb(_v, _a) z_writeb(_v, ax_convert_addr(_a)) + +#define ei_inw(_a) z_readw(ax_convert_addr(_a)) +#define ei_outw(_v, _a) z_writew(_v, ax_convert_addr(_a)) + +#define ei_inb_p(_a) ei_inb(_a) +#define ei_outb_p(_v, _a) ei_outb(_v, _a) + +/* define EI_SHIFT() to take into account our register offsets */ +#define EI_SHIFT(x) (ei_local->reg_offset[(x)]) + +/* Ensure we have our RCR base value */ +#define AX88796_PLATFORM + +static unsigned char version[] = + "ax88796.c: Copyright 2005,2007 Simtec Electronics\n"; + +#include "lib8390.c" + +/* from ne.c */ +#define NE_CMD EI_SHIFT(0x00) +#define NE_RESET EI_SHIFT(0x1f) +#define NE_DATAPORT EI_SHIFT(0x10) + +struct xsurf100_ax_plat_data { + struct ax_plat_data ax; + void __iomem *base_regs; + void __iomem *data_area; +}; + +static int is_xsurf100_network_irq(struct platform_device *pdev) +{ + struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev); + + return (readw(xs100->base_regs + XS100_IRQSTATUS_BASE) & 0xaaaa) != 0; +} + +/* These functions guarantee that the iomem is accessed with 32 bit + * cycles only. z_memcpy_fromio / z_memcpy_toio don't + */ +static void z_memcpy_fromio32(void *dst, const void __iomem *src, size_t bytes) +{ + while (bytes > 32) { + asm __volatile__ + ("movem.l (%0)+,%%d0-%%d7\n" + "movem.l %%d0-%%d7,(%1)\n" + "adda.l #32,%1" : "=a"(src), "=a"(dst) + : "0"(src), "1"(dst) : "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "memory"); + bytes -= 32; + } + while (bytes) { + *(uint32_t *)dst = z_readl(src); + src += 4; + dst += 4; + bytes -= 4; + } +} + +static void z_memcpy_toio32(void __iomem *dst, const void *src, size_t bytes) +{ + while (bytes) { + z_writel(*(const uint32_t *)src, dst); + src += 4; + dst += 4; + bytes -= 4; + } +} + +static void xs100_write(struct net_device *dev, const void *src, + unsigned int count) +{ + struct ei_device *ei_local = netdev_priv(dev); + struct platform_device *pdev = to_platform_device(dev->dev.parent); + struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev); + + /* copy whole blocks */ + while (count > XS100_8390_DATA_AREA_SIZE) { + z_memcpy_toio32(xs100->data_area + + XS100_8390_DATA_WRITE32_BASE, src, + XS100_8390_DATA_AREA_SIZE); + src += XS100_8390_DATA_AREA_SIZE; + count -= XS100_8390_DATA_AREA_SIZE; + } + /* copy whole dwords */ + z_memcpy_toio32(xs100->data_area + XS100_8390_DATA_WRITE32_BASE, + src, count & ~3); + src += count & ~3; + if (count & 2) { + ei_outw(*(uint16_t *)src, ei_local->mem + NE_DATAPORT); + src += 2; + } + if (count & 1) + ei_outb(*(uint8_t *)src, ei_local->mem + NE_DATAPORT); +} + +static void xs100_read(struct net_device *dev, void *dst, unsigned int count) +{ + struct ei_device *ei_local = netdev_priv(dev); + struct platform_device *pdev = to_platform_device(dev->dev.parent); + struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev); + + /* copy whole blocks */ + while (count > XS100_8390_DATA_AREA_SIZE) { + z_memcpy_fromio32(dst, xs100->data_area + + XS100_8390_DATA_READ32_BASE, + XS100_8390_DATA_AREA_SIZE); + dst += XS100_8390_DATA_AREA_SIZE; + count -= XS100_8390_DATA_AREA_SIZE; + } + /* copy whole dwords */ + z_memcpy_fromio32(dst, xs100->data_area + XS100_8390_DATA_READ32_BASE, + count & ~3); + dst += count & ~3; + if (count & 2) { + *(uint16_t *)dst = ei_inw(ei_local->mem + NE_DATAPORT); + dst += 2; + } + if (count & 1) + *(uint8_t *)dst = ei_inb(ei_local->mem + NE_DATAPORT); +} + +/* Block input and output, similar to the Crynwr packet driver. If + * you are porting to a new ethercard, look at the packet driver + * source for hints. The NEx000 doesn't share the on-board packet + * memory -- you have to put the packet out through the "remote DMA" + * dataport using ei_outb. + */ +static void xs100_block_input(struct net_device *dev, int count, + struct sk_buff *skb, int ring_offset) +{ + struct ei_device *ei_local = netdev_priv(dev); + void __iomem *nic_base = ei_local->mem; + char *buf = skb->data; + + if (ei_local->dmaing) { + netdev_err(dev, + "DMAing conflict in %s [DMAstat:%d][irqlock:%d]\n", + __func__, + ei_local->dmaing, ei_local->irqlock); + return; + } + + ei_local->dmaing |= 0x01; + + ei_outb(E8390_NODMA + E8390_PAGE0 + E8390_START, nic_base + NE_CMD); + ei_outb(count & 0xff, nic_base + EN0_RCNTLO); + ei_outb(count >> 8, nic_base + EN0_RCNTHI); + ei_outb(ring_offset & 0xff, nic_base + EN0_RSARLO); + ei_outb(ring_offset >> 8, nic_base + EN0_RSARHI); + ei_outb(E8390_RREAD + E8390_START, nic_base + NE_CMD); + + xs100_read(dev, buf, count); + + ei_local->dmaing &= ~1; +} + +static void xs100_block_output(struct net_device *dev, int count, + const unsigned char *buf, const int start_page) +{ + struct ei_device *ei_local = netdev_priv(dev); + void __iomem *nic_base = ei_local->mem; + unsigned long dma_start; + + /* Round the count up for word writes. Do we need to do this? + * What effect will an odd byte count have on the 8390? I + * should check someday. + */ + if (ei_local->word16 && (count & 0x01)) + count++; + + /* This *shouldn't* happen. If it does, it's the last thing + * you'll see + */ + if (ei_local->dmaing) { + netdev_err(dev, + "DMAing conflict in %s [DMAstat:%d][irqlock:%d]\n", + __func__, + ei_local->dmaing, ei_local->irqlock); + return; + } + + ei_local->dmaing |= 0x01; + /* We should already be in page 0, but to be safe... */ + ei_outb(E8390_PAGE0 + E8390_START + E8390_NODMA, nic_base + NE_CMD); + + ei_outb(ENISR_RDC, nic_base + EN0_ISR); + + /* Now the normal output. */ + ei_outb(count & 0xff, nic_base + EN0_RCNTLO); + ei_outb(count >> 8, nic_base + EN0_RCNTHI); + ei_outb(0x00, nic_base + EN0_RSARLO); + ei_outb(start_page, nic_base + EN0_RSARHI); + + ei_outb(E8390_RWRITE + E8390_START, nic_base + NE_CMD); + + xs100_write(dev, buf, count); + + dma_start = jiffies; + + while ((ei_inb(nic_base + EN0_ISR) & ENISR_RDC) == 0) { + if (jiffies - dma_start > 2 * HZ / 100) { /* 20ms */ + netdev_warn(dev, "timeout waiting for Tx RDC.\n"); + ei_local->reset_8390(dev); + ax_NS8390_init(dev, 1); + break; + } + } + + ei_outb(ENISR_RDC, nic_base + EN0_ISR); /* Ack intr. */ + ei_local->dmaing &= ~0x01; +} + +static int xsurf100_probe(struct zorro_dev *zdev, + const struct zorro_device_id *ent) +{ + struct platform_device *pdev; + struct xsurf100_ax_plat_data ax88796_data; + struct resource res[2] = { + DEFINE_RES_NAMED(IRQ_AMIGA_PORTS, 1, NULL, + IORESOURCE_IRQ | IORESOURCE_IRQ_SHAREABLE), + DEFINE_RES_MEM(zdev->resource.start + XS100_8390_BASE, + 4 * 0x20) + }; + int reg; + /* This table is referenced in the device structure, so it must + * outlive the scope of xsurf100_probe. + */ + static u32 reg_offsets[32]; + int ret = 0; + + /* X-Surf 100 control and 32 bit ring buffer data access areas. + * These resources are not used by the ax88796 driver, so must + * be requested here and passed via platform data. + */ + + if (!request_mem_region(zdev->resource.start, 0x100, zdev->name)) { + dev_err(&zdev->dev, "cannot reserve X-Surf 100 control registers\n"); + return -ENXIO; + } + + if (!request_mem_region(zdev->resource.start + + XS100_8390_DATA32_BASE, + XS100_8390_DATA32_SIZE, + "X-Surf 100 32-bit data access")) { + dev_err(&zdev->dev, "cannot reserve 32-bit area\n"); + ret = -ENXIO; + goto exit_req; + } + + for (reg = 0; reg < 0x20; reg++) + reg_offsets[reg] = 4 * reg; + + memset(&ax88796_data, 0, sizeof(ax88796_data)); + ax88796_data.ax.flags = AXFLG_HAS_EEPROM; + ax88796_data.ax.wordlength = 2; + ax88796_data.ax.dcr_val = 0x48; + ax88796_data.ax.rcr_val = 0x40; + ax88796_data.ax.reg_offsets = reg_offsets; + ax88796_data.ax.check_irq = is_xsurf100_network_irq; + ax88796_data.base_regs = ioremap(zdev->resource.start, 0x100); + + /* error handling for ioremap regs */ + if (!ax88796_data.base_regs) { + dev_err(&zdev->dev, "Cannot ioremap area %pR (registers)\n", + &zdev->resource); + + ret = -ENXIO; + goto exit_req2; + } + + ax88796_data.data_area = ioremap(zdev->resource.start + + XS100_8390_DATA32_BASE, XS100_8390_DATA32_SIZE); + + /* error handling for ioremap data */ + if (!ax88796_data.data_area) { + dev_err(&zdev->dev, + "Cannot ioremap area %pR offset %x (32-bit access)\n", + &zdev->resource, XS100_8390_DATA32_BASE); + + ret = -ENXIO; + goto exit_mem; + } + + ax88796_data.ax.block_output = xs100_block_output; + ax88796_data.ax.block_input = xs100_block_input; + + pdev = platform_device_register_resndata(&zdev->dev, "ax88796", + zdev->slotaddr, res, 2, + &ax88796_data, + sizeof(ax88796_data)); + + if (IS_ERR(pdev)) { + dev_err(&zdev->dev, "cannot register platform device\n"); + ret = -ENXIO; + goto exit_mem2; + } + + zorro_set_drvdata(zdev, pdev); + + if (!ret) + return 0; + + exit_mem2: + iounmap(ax88796_data.data_area); + + exit_mem: + iounmap(ax88796_data.base_regs); + + exit_req2: + release_mem_region(zdev->resource.start + XS100_8390_DATA32_BASE, + XS100_8390_DATA32_SIZE); + + exit_req: + release_mem_region(zdev->resource.start, 0x100); + + return ret; +} + +static void xsurf100_remove(struct zorro_dev *zdev) +{ + struct platform_device *pdev = zorro_get_drvdata(zdev); + struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev); + + platform_device_unregister(pdev); + + iounmap(xs100->base_regs); + release_mem_region(zdev->resource.start, 0x100); + iounmap(xs100->data_area); + release_mem_region(zdev->resource.start + XS100_8390_DATA32_BASE, + XS100_8390_DATA32_SIZE); +} + +static const struct zorro_device_id xsurf100_zorro_tbl[] = { + { ZORRO_PROD_INDIVIDUAL_COMPUTERS_X_SURF100, }, + { 0 } +}; + +MODULE_DEVICE_TABLE(zorro, xsurf100_zorro_tbl); + +static struct zorro_driver xsurf100_driver = { + .name = "xsurf100", + .id_table = xsurf100_zorro_tbl, + .probe = xsurf100_probe, + .remove = xsurf100_remove, +}; + +module_driver(xsurf100_driver, zorro_register_driver, zorro_unregister_driver); + +MODULE_DESCRIPTION("X-Surf 100 driver"); +MODULE_AUTHOR("Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index e8b290473ee2..bc9861c90ea3 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -1497,3 +1497,57 @@ void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx, octeon_mbox_write(oct, &mbox_cmd); } } + +static void +cn23xx_get_vf_stats_callback(struct octeon_device *oct, + struct octeon_mbox_cmd *cmd, void *arg) +{ + struct oct_vf_stats_ctx *ctx = arg; + + memcpy(ctx->stats, cmd->data, sizeof(struct oct_vf_stats)); + atomic_set(&ctx->status, 1); +} + +int cn23xx_get_vf_stats(struct octeon_device *oct, int vfidx, + struct oct_vf_stats *stats) +{ + u32 timeout = HZ; // 1sec + struct octeon_mbox_cmd mbox_cmd; + struct oct_vf_stats_ctx ctx; + u32 count = 0, ret; + + if (!(oct->sriov_info.vf_drv_loaded_mask & (1ULL << vfidx))) + return -1; + + if (sizeof(struct oct_vf_stats) > sizeof(mbox_cmd.data)) + return -1; + + mbox_cmd.msg.u64 = 0; + mbox_cmd.msg.s.type = OCTEON_MBOX_REQUEST; + mbox_cmd.msg.s.resp_needed = 1; + mbox_cmd.msg.s.cmd = OCTEON_GET_VF_STATS; + mbox_cmd.msg.s.len = 1; + mbox_cmd.q_no = vfidx * oct->sriov_info.rings_per_vf; + mbox_cmd.recv_len = 0; + mbox_cmd.recv_status = 0; + mbox_cmd.fn = (octeon_mbox_callback_t)cn23xx_get_vf_stats_callback; + ctx.stats = stats; + atomic_set(&ctx.status, 0); + mbox_cmd.fn_arg = (void *)&ctx; + memset(mbox_cmd.data, 0, sizeof(mbox_cmd.data)); + octeon_mbox_write(oct, &mbox_cmd); + + do { + schedule_timeout_uninterruptible(1); + } while ((atomic_read(&ctx.status) == 0) && (count++ < timeout)); + + ret = atomic_read(&ctx.status); + if (ret == 0) { + octeon_mbox_cancel(oct, 0); + dev_err(&oct->pci_dev->dev, "Unable to get stats from VF-%d, timedout\n", + vfidx); + return -1; + } + + return 0; +} diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h index 2aba5247b6d8..63b3de4f2bfe 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h @@ -43,6 +43,15 @@ struct octeon_cn23xx_pf { #define CN23XX_SLI_DEF_BP 0x40 +struct oct_vf_stats { + u64 rx_packets; + u64 tx_packets; + u64 rx_bytes; + u64 tx_bytes; + u64 broadcast; + u64 multicast; +}; + int setup_cn23xx_octeon_pf_device(struct octeon_device *oct); int validate_cn23xx_pf_config_info(struct octeon_device *oct, @@ -56,4 +65,7 @@ int cn23xx_fw_loaded(struct octeon_device *oct); void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx, u8 *mac); + +int cn23xx_get_vf_stats(struct octeon_device *oct, int ifidx, + struct oct_vf_stats *stats); #endif diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 44c2a0c1bbdd..f3891ae11b02 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -3326,6 +3326,31 @@ static const struct switchdev_ops lio_pf_switchdev_ops = { .switchdev_port_attr_get = lio_pf_switchdev_attr_get, }; +static int liquidio_get_vf_stats(struct net_device *netdev, int vfidx, + struct ifla_vf_stats *vf_stats) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + struct oct_vf_stats stats; + int ret; + + if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced) + return -EINVAL; + + memset(&stats, 0, sizeof(struct oct_vf_stats)); + ret = cn23xx_get_vf_stats(oct, vfidx, &stats); + if (!ret) { + vf_stats->rx_packets = stats.rx_packets; + vf_stats->tx_packets = stats.tx_packets; + vf_stats->rx_bytes = stats.rx_bytes; + vf_stats->tx_bytes = stats.tx_bytes; + vf_stats->broadcast = stats.broadcast; + vf_stats->multicast = stats.multicast; + } + + return ret; +} + static const struct net_device_ops lionetdevops = { .ndo_open = liquidio_open, .ndo_stop = liquidio_stop, @@ -3348,6 +3373,7 @@ static const struct net_device_ops lionetdevops = { .ndo_get_vf_config = liquidio_get_vf_config, .ndo_set_vf_trust = liquidio_set_vf_trust, .ndo_set_vf_link_state = liquidio_set_vf_link_state, + .ndo_get_vf_stats = liquidio_get_vf_stats, }; /** \brief Entry point for the liquidio module diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c index 28e74ee23ff8..021d99cd1665 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c @@ -24,6 +24,7 @@ #include "octeon_device.h" #include "octeon_main.h" #include "octeon_mailbox.h" +#include "cn23xx_pf_device.h" /** * octeon_mbox_read: @@ -205,6 +206,26 @@ int octeon_mbox_write(struct octeon_device *oct, return ret; } +static void get_vf_stats(struct octeon_device *oct, + struct oct_vf_stats *stats) +{ + int i; + + for (i = 0; i < oct->num_iqs; i++) { + if (!oct->instr_queue[i]) + continue; + stats->tx_packets += oct->instr_queue[i]->stats.tx_done; + stats->tx_bytes += oct->instr_queue[i]->stats.tx_tot_bytes; + } + + for (i = 0; i < oct->num_oqs; i++) { + if (!oct->droq[i]) + continue; + stats->rx_packets += oct->droq[i]->stats.rx_pkts_received; + stats->rx_bytes += oct->droq[i]->stats.rx_bytes_received; + } +} + /** * octeon_mbox_process_cmd: * @mbox: Pointer mailbox @@ -250,6 +271,15 @@ static int octeon_mbox_process_cmd(struct octeon_mbox *mbox, mbox_cmd->msg.s.params); break; + case OCTEON_GET_VF_STATS: + dev_dbg(&oct->pci_dev->dev, "Got VF stats request. Sending data back\n"); + mbox_cmd->msg.s.type = OCTEON_MBOX_RESPONSE; + mbox_cmd->msg.s.resp_needed = 1; + mbox_cmd->msg.s.len = 1 + + sizeof(struct oct_vf_stats) / sizeof(u64); + get_vf_stats(oct, (struct oct_vf_stats *)mbox_cmd->data); + octeon_mbox_write(oct, mbox_cmd); + break; default: break; } @@ -322,3 +352,25 @@ int octeon_mbox_process_message(struct octeon_mbox *mbox) return 0; } + +int octeon_mbox_cancel(struct octeon_device *oct, int q_no) +{ + struct octeon_mbox *mbox = oct->mbox[q_no]; + struct octeon_mbox_cmd *mbox_cmd; + unsigned long flags = 0; + + spin_lock_irqsave(&mbox->lock, flags); + mbox_cmd = &mbox->mbox_resp; + + if (!(mbox->state & OCTEON_MBOX_STATE_RESPONSE_PENDING)) { + spin_unlock_irqrestore(&mbox->lock, flags); + return 1; + } + + mbox->state = OCTEON_MBOX_STATE_IDLE; + memset(mbox_cmd, 0, sizeof(*mbox_cmd)); + writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg); + spin_unlock_irqrestore(&mbox->lock, flags); + + return 0; +} diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h index 1def22afeff1..d92bd7e16477 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h @@ -25,6 +25,7 @@ #define OCTEON_VF_ACTIVE 0x1 #define OCTEON_VF_FLR_REQUEST 0x2 #define OCTEON_PF_CHANGED_VF_MACADDR 0x4 +#define OCTEON_GET_VF_STATS 0x8 /*Macro for Read acknowldgement*/ #define OCTEON_PFVFACK 0xffffffffffffffffULL @@ -107,9 +108,15 @@ struct octeon_mbox { }; +struct oct_vf_stats_ctx { + atomic_t status; + struct oct_vf_stats *stats; +}; + int octeon_mbox_read(struct octeon_mbox *mbox); int octeon_mbox_write(struct octeon_device *oct, struct octeon_mbox_cmd *mbox_cmd); int octeon_mbox_process_message(struct octeon_mbox *mbox); +int octeon_mbox_cancel(struct octeon_device *oct, int q_no); #endif diff --git a/drivers/net/ethernet/huawei/hinic/Kconfig b/drivers/net/ethernet/huawei/hinic/Kconfig index 08db24954f7e..e4e8b24c1a5d 100644 --- a/drivers/net/ethernet/huawei/hinic/Kconfig +++ b/drivers/net/ethernet/huawei/hinic/Kconfig @@ -4,7 +4,7 @@ config HINIC tristate "Huawei Intelligent PCIE Network Interface Card" - depends on (PCI_MSI && X86) + depends on (PCI_MSI && (X86 || ARM64)) ---help--- This driver supports HiNIC PCIE Ethernet cards. To compile this driver as part of the kernel, choose Y here. diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 1904c0323d39..8e4edb634b11 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -442,7 +442,7 @@ struct mlxsw_sp_fib6_entry { struct mlxsw_sp_rt6 { struct list_head list; - struct rt6_info *rt; + struct fib6_info *rt; }; struct mlxsw_sp_lpm_tree { @@ -2770,9 +2770,9 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp, struct in6_addr *gw; int ifindex, weight; - ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex; - weight = mlxsw_sp_rt6->rt->rt6i_nh_weight; - gw = &mlxsw_sp_rt6->rt->rt6i_gateway; + ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex; + weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight; + gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw; if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex, weight)) return false; @@ -2838,7 +2838,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed) struct net_device *dev; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - dev = mlxsw_sp_rt6->rt->dst.dev; + dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev; val ^= dev->ifindex; } @@ -3834,11 +3834,11 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, for (i = 0; i < nh_grp->count; i++) { struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; - struct rt6_info *rt = mlxsw_sp_rt6->rt; + struct fib6_info *rt = mlxsw_sp_rt6->rt; - if (nh->rif && nh->rif->dev == rt->dst.dev && + if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev && ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, - &rt->rt6i_gateway)) + &rt->fib6_nh.nh_gw)) return nh; continue; } @@ -3895,7 +3895,7 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) { list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, - list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD; + list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD; return; } @@ -3905,9 +3905,9 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); if (nh && nh->offloaded) - mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD; + mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD; else - mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD; + mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD; } } @@ -3920,9 +3920,9 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, common); list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - struct rt6_info *rt = mlxsw_sp_rt6->rt; + struct fib6_info *rt = mlxsw_sp_rt6->rt; - rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD; + rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD; } } @@ -4699,29 +4699,29 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } -static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt) +static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt) { /* Packets with link-local destination IP arriving to the router * are trapped to the CPU, so no need to program specific routes * for them. */ - if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL) + if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL) return true; /* Multicast routes aren't supported, so ignore them. Neighbour * Discovery packets are specifically trapped. */ - if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) + if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST) return true; /* Cloned routes are irrelevant in the forwarding path. */ - if (rt->rt6i_flags & RTF_CACHE) + if (rt->fib6_flags & RTF_CACHE) return true; return false; } -static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt) +static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; @@ -4734,18 +4734,18 @@ static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt) * memory. */ mlxsw_sp_rt6->rt = rt; - rt6_hold(rt); + fib6_info_hold(rt); return mlxsw_sp_rt6; } #if IS_ENABLED(CONFIG_IPV6) -static void mlxsw_sp_rt6_release(struct rt6_info *rt) +static void mlxsw_sp_rt6_release(struct fib6_info *rt) { - rt6_release(rt); + fib6_info_release(rt); } #else -static void mlxsw_sp_rt6_release(struct rt6_info *rt) +static void mlxsw_sp_rt6_release(struct fib6_info *rt) { } #endif @@ -4756,13 +4756,13 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) kfree(mlxsw_sp_rt6); } -static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt) +static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt) { /* RTF_CACHE routes are ignored */ - return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY; + return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY; } -static struct rt6_info * +static struct fib6_info * mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) { return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, @@ -4771,7 +4771,7 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct rt6_info *nrt, bool replace) + const struct fib6_info *nrt, bool replace) { struct mlxsw_sp_fib6_entry *fib6_entry; @@ -4779,21 +4779,21 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, return NULL; list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same * virtual router. */ - if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id) + if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id) continue; - if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id) + if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id) break; - if (rt->rt6i_metric < nrt->rt6i_metric) + if (rt->fib6_metric < nrt->fib6_metric) continue; - if (rt->rt6i_metric == nrt->rt6i_metric && + if (rt->fib6_metric == nrt->fib6_metric && mlxsw_sp_fib6_rt_can_mp(rt)) return fib6_entry; - if (rt->rt6i_metric > nrt->rt6i_metric) + if (rt->fib6_metric > nrt->fib6_metric) break; } @@ -4802,7 +4802,7 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, static struct mlxsw_sp_rt6 * mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, - const struct rt6_info *rt) + const struct fib6_info *rt) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; @@ -4815,21 +4815,21 @@ mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, } static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp, - const struct rt6_info *rt, + const struct fib6_info *rt, enum mlxsw_sp_ipip_type *ret) { - return rt->dst.dev && - mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret); + return rt->fib6_nh.nh_dev && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret); } static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh, - const struct rt6_info *rt) + const struct fib6_info *rt) { const struct mlxsw_sp_ipip_ops *ipip_ops; struct mlxsw_sp_ipip_entry *ipip_entry; - struct net_device *dev = rt->dst.dev; + struct net_device *dev = rt->fib6_nh.nh_dev; struct mlxsw_sp_rif *rif; int err; @@ -4870,13 +4870,13 @@ static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh, - const struct rt6_info *rt) + const struct fib6_info *rt) { - struct net_device *dev = rt->dst.dev; + struct net_device *dev = rt->fib6_nh.nh_dev; nh->nh_grp = nh_grp; - nh->nh_weight = rt->rt6i_nh_weight; - memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); + nh->nh_weight = rt->fib6_nh.nh_weight; + memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr)); mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); @@ -4897,9 +4897,9 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, } static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, - const struct rt6_info *rt) + const struct fib6_info *rt) { - return rt->rt6i_flags & RTF_GATEWAY || + return rt->fib6_flags & RTF_GATEWAY || mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); } @@ -4928,7 +4928,7 @@ mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt); nh_grp->count = fib6_entry->nrt6; for (i = 0; i < nh_grp->count; i++) { - struct rt6_info *rt = mlxsw_sp_rt6->rt; + struct fib6_info *rt = mlxsw_sp_rt6->rt; nh = &nh_grp->nexthops[i]; err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt); @@ -5040,7 +5040,7 @@ err_nexthop6_group_get: static int mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - struct rt6_info *rt) + struct fib6_info *rt) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; int err; @@ -5068,7 +5068,7 @@ err_nexthop6_group_update: static void mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - struct rt6_info *rt) + struct fib6_info *rt) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; @@ -5084,7 +5084,7 @@ mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, - const struct rt6_info *rt) + const struct fib6_info *rt) { /* Packets hitting RTF_REJECT routes need to be discarded by the * stack. We can rely on their destination device not having a @@ -5092,9 +5092,9 @@ static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, * local, which will cause them to be trapped with a lower * priority than packets that need to be locally received. */ - if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) + if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; - else if (rt->rt6i_flags & RTF_REJECT) + else if (rt->fib6_flags & RTF_REJECT) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt)) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; @@ -5118,7 +5118,7 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry) static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, - struct rt6_info *rt) + struct fib6_info *rt) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_entry *fib_entry; @@ -5168,25 +5168,25 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct rt6_info *nrt, bool replace) + const struct fib6_info *nrt, bool replace) { struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL; list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); - if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id) + if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id) continue; - if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id) + if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id) break; - if (replace && rt->rt6i_metric == nrt->rt6i_metric) { + if (replace && rt->fib6_metric == nrt->fib6_metric) { if (mlxsw_sp_fib6_rt_can_mp(rt) == mlxsw_sp_fib6_rt_can_mp(nrt)) return fib6_entry; if (mlxsw_sp_fib6_rt_can_mp(nrt)) fallback = fallback ?: fib6_entry; } - if (rt->rt6i_metric > nrt->rt6i_metric) + if (rt->fib6_metric > nrt->fib6_metric) return fallback ?: fib6_entry; } @@ -5198,7 +5198,7 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, bool replace) { struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; - struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); + struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); struct mlxsw_sp_fib6_entry *fib6_entry; fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace); @@ -5213,9 +5213,9 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, struct mlxsw_sp_fib6_entry *last; list_for_each_entry(last, &fib_node->entry_list, common.list) { - struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last); + struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last); - if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id) + if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id) break; fib6_entry = last; } @@ -5268,29 +5268,29 @@ mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, - const struct rt6_info *rt) + const struct fib6_info *rt) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; struct mlxsw_sp_fib *fib; struct mlxsw_sp_vr *vr; - vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id); + vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id); if (!vr) return NULL; fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6); - fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr, - sizeof(rt->rt6i_dst.addr), - rt->rt6i_dst.plen); + fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr, + sizeof(rt->fib6_dst.addr), + rt->fib6_dst.plen); if (!fib_node) return NULL; list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); - if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id && - rt->rt6i_metric == iter_rt->rt6i_metric && + if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id && + rt->fib6_metric == iter_rt->fib6_metric && mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt)) return fib6_entry; } @@ -5316,7 +5316,7 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, - struct rt6_info *rt, bool replace) + struct fib6_info *rt, bool replace) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; @@ -5325,16 +5325,16 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, if (mlxsw_sp->router->aborted) return 0; - if (rt->rt6i_src.plen) + if (rt->fib6_src.plen) return -EINVAL; if (mlxsw_sp_fib6_rt_should_ignore(rt)) return 0; - fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id, - &rt->rt6i_dst.addr, - sizeof(rt->rt6i_dst.addr), - rt->rt6i_dst.plen, + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id, + &rt->fib6_dst.addr, + sizeof(rt->fib6_dst.addr), + rt->fib6_dst.plen, MLXSW_SP_L3_PROTO_IPV6); if (IS_ERR(fib_node)) return PTR_ERR(fib_node); @@ -5373,7 +5373,7 @@ err_fib6_entry_nexthop_add: } static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, - struct rt6_info *rt) + struct fib6_info *rt) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; @@ -5836,7 +5836,7 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, fen6_info = container_of(info, struct fib6_entry_notifier_info, info); fib_work->fen6_info = *fen6_info; - rt6_hold(fib_work->fen6_info.rt); + fib6_info_hold(fib_work->fen6_info.rt); break; } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index e874504e8b28..8b1b7e8ca56c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2850,6 +2850,24 @@ static int qed_fp_cqe_completion(struct qed_dev *dev, cqe); } +static int qed_req_bulletin_update_mac(struct qed_dev *cdev, u8 *mac) +{ + int i, ret; + + if (IS_PF(cdev)) + return 0; + + for_each_hwfn(cdev, i) { + struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; + + ret = qed_vf_pf_bulletin_update_mac(p_hwfn, mac); + if (ret) + return ret; + } + + return 0; +} + #ifdef CONFIG_QED_SRIOV extern const struct qed_iov_hv_ops qed_iov_ops_pass; #endif @@ -2887,6 +2905,7 @@ static const struct qed_eth_ops qed_eth_ops_pass = { .ntuple_filter_config = &qed_ntuple_arfs_filter_config, .configure_arfs_searcher = &qed_configure_arfs_searcher, .get_coalesce = &qed_get_coalesce, + .req_bulletin_update_mac = &qed_req_bulletin_update_mac, }; const struct qed_eth_ops *qed_get_eth_ops(void) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 5acb91b3564c..f01bf52bc381 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -48,7 +48,7 @@ static int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn, u8 opcode, __le16 echo, union event_ring_data *data, u8 fw_return_code); - +static int qed_iov_bulletin_set_mac(struct qed_hwfn *p_hwfn, u8 *mac, int vfid); static u8 qed_vf_calculate_legacy(struct qed_vf_info *p_vf) { @@ -1790,7 +1790,8 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn, if (!p_vf->vport_instance) return -EINVAL; - if (events & BIT(MAC_ADDR_FORCED)) { + if ((events & BIT(MAC_ADDR_FORCED)) || + p_vf->p_vf_info.is_trusted_configured) { /* Since there's no way [currently] of removing the MAC, * we can always assume this means we need to force it. */ @@ -1809,8 +1810,12 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn, "PF failed to configure MAC for VF\n"); return rc; } - - p_vf->configured_features |= 1 << MAC_ADDR_FORCED; + if (p_vf->p_vf_info.is_trusted_configured) + p_vf->configured_features |= + BIT(VFPF_BULLETIN_MAC_ADDR); + else + p_vf->configured_features |= + BIT(MAC_ADDR_FORCED); } if (events & BIT(VLAN_ADDR_FORCED)) { @@ -3170,6 +3175,10 @@ static int qed_iov_vf_update_mac_shadow(struct qed_hwfn *p_hwfn, if (p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED)) return 0; + /* Don't keep track of shadow copy since we don't intend to restore. */ + if (p_vf->p_vf_info.is_trusted_configured) + return 0; + /* First remove entries and then add new ones */ if (p_params->opcode == QED_FILTER_REMOVE) { for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) { @@ -3244,9 +3253,17 @@ static int qed_iov_chk_ucast(struct qed_hwfn *hwfn, /* No real decision to make; Store the configured MAC */ if (params->type == QED_FILTER_MAC || - params->type == QED_FILTER_MAC_VLAN) + params->type == QED_FILTER_MAC_VLAN) { ether_addr_copy(vf->mac, params->mac); + if (vf->is_trusted_configured) { + qed_iov_bulletin_set_mac(hwfn, vf->mac, vfid); + + /* Update and post bulleitin again */ + qed_schedule_iov(hwfn, QED_IOV_WQ_BULLETIN_UPDATE_FLAG); + } + } + return 0; } @@ -3803,6 +3820,40 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn, __qed_vf_get_link_caps(p_hwfn, p_caps, p_bulletin); } +static int +qed_iov_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_vf_info *p_vf) +{ + struct qed_bulletin_content *p_bulletin = p_vf->bulletin.p_virt; + struct qed_iov_vf_mbx *mbx = &p_vf->vf_mbx; + struct vfpf_bulletin_update_mac_tlv *p_req; + u8 status = PFVF_STATUS_SUCCESS; + int rc = 0; + + if (!p_vf->p_vf_info.is_trusted_configured) { + DP_VERBOSE(p_hwfn, + QED_MSG_IOV, + "Blocking bulletin update request from untrusted VF[%d]\n", + p_vf->abs_vf_id); + status = PFVF_STATUS_NOT_SUPPORTED; + rc = -EINVAL; + goto send_status; + } + + p_req = &mbx->req_virt->bulletin_update_mac; + ether_addr_copy(p_bulletin->mac, p_req->mac); + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "Updated bulletin of VF[%d] with requested MAC[%pM]\n", + p_vf->abs_vf_id, p_req->mac); + +send_status: + qed_iov_prepare_resp(p_hwfn, p_ptt, p_vf, + CHANNEL_TLV_BULLETIN_UPDATE_MAC, + sizeof(struct pfvf_def_resp_tlv), status); + return rc; +} + static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, int vfid) { @@ -3882,6 +3933,9 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn, case CHANNEL_TLV_COALESCE_READ: qed_iov_vf_pf_get_coalesce(p_hwfn, p_ptt, p_vf); break; + case CHANNEL_TLV_BULLETIN_UPDATE_MAC: + qed_iov_vf_pf_bulletin_update_mac(p_hwfn, p_ptt, p_vf); + break; } } else if (qed_iov_tlv_supported(mbx->first_tlv.tl.type)) { DP_VERBOSE(p_hwfn, QED_MSG_IOV, @@ -4081,16 +4135,60 @@ static void qed_iov_bulletin_set_forced_mac(struct qed_hwfn *p_hwfn, return; } - feature = 1 << MAC_ADDR_FORCED; + if (vf_info->p_vf_info.is_trusted_configured) { + feature = BIT(VFPF_BULLETIN_MAC_ADDR); + /* Trust mode will disable Forced MAC */ + vf_info->bulletin.p_virt->valid_bitmap &= + ~BIT(MAC_ADDR_FORCED); + } else { + feature = BIT(MAC_ADDR_FORCED); + /* Forced MAC will disable MAC_ADDR */ + vf_info->bulletin.p_virt->valid_bitmap &= + ~BIT(VFPF_BULLETIN_MAC_ADDR); + } + memcpy(vf_info->bulletin.p_virt->mac, mac, ETH_ALEN); vf_info->bulletin.p_virt->valid_bitmap |= feature; - /* Forced MAC will disable MAC_ADDR */ - vf_info->bulletin.p_virt->valid_bitmap &= ~BIT(VFPF_BULLETIN_MAC_ADDR); qed_iov_configure_vport_forced(p_hwfn, vf_info, feature); } +static int qed_iov_bulletin_set_mac(struct qed_hwfn *p_hwfn, u8 *mac, int vfid) +{ + struct qed_vf_info *vf_info; + u64 feature; + + vf_info = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true); + if (!vf_info) { + DP_NOTICE(p_hwfn->cdev, "Can not set MAC, invalid vfid [%d]\n", + vfid); + return -EINVAL; + } + + if (vf_info->b_malicious) { + DP_NOTICE(p_hwfn->cdev, "Can't set MAC to malicious VF [%d]\n", + vfid); + return -EINVAL; + } + + if (vf_info->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED)) { + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "Can not set MAC, Forced MAC is configured\n"); + return -EINVAL; + } + + feature = BIT(VFPF_BULLETIN_MAC_ADDR); + ether_addr_copy(vf_info->bulletin.p_virt->mac, mac); + + vf_info->bulletin.p_virt->valid_bitmap |= feature; + + if (vf_info->p_vf_info.is_trusted_configured) + qed_iov_configure_vport_forced(p_hwfn, vf_info, feature); + + return 0; +} + static void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn, u16 pvid, int vfid) { @@ -4204,6 +4302,21 @@ out: return rc; } +static u8 *qed_iov_bulletin_get_mac(struct qed_hwfn *p_hwfn, u16 rel_vf_id) +{ + struct qed_vf_info *p_vf; + + p_vf = qed_iov_get_vf_info(p_hwfn, rel_vf_id, true); + if (!p_vf || !p_vf->bulletin.p_virt) + return NULL; + + if (!(p_vf->bulletin.p_virt->valid_bitmap & + BIT(VFPF_BULLETIN_MAC_ADDR))) + return NULL; + + return p_vf->bulletin.p_virt->mac; +} + static u8 *qed_iov_bulletin_get_forced_mac(struct qed_hwfn *p_hwfn, u16 rel_vf_id) { @@ -4493,8 +4606,12 @@ static int qed_sriov_pf_set_mac(struct qed_dev *cdev, u8 *mac, int vfid) if (!vf_info) continue; - /* Set the forced MAC, and schedule the IOV task */ - ether_addr_copy(vf_info->forced_mac, mac); + /* Set the MAC, and schedule the IOV task */ + if (vf_info->is_trusted_configured) + ether_addr_copy(vf_info->mac, mac); + else + ether_addr_copy(vf_info->forced_mac, mac); + qed_schedule_iov(hwfn, QED_IOV_WQ_SET_UNICAST_FILTER_FLAG); } @@ -4802,6 +4919,33 @@ static void qed_handle_vf_msg(struct qed_hwfn *hwfn) qed_ptt_release(hwfn, ptt); } +static bool qed_pf_validate_req_vf_mac(struct qed_hwfn *hwfn, + u8 *mac, + struct qed_public_vf_info *info) +{ + if (info->is_trusted_configured) { + if (is_valid_ether_addr(info->mac) && + (!mac || !ether_addr_equal(mac, info->mac))) + return true; + } else { + if (is_valid_ether_addr(info->forced_mac) && + (!mac || !ether_addr_equal(mac, info->forced_mac))) + return true; + } + + return false; +} + +static void qed_set_bulletin_mac(struct qed_hwfn *hwfn, + struct qed_public_vf_info *info, + int vfid) +{ + if (info->is_trusted_configured) + qed_iov_bulletin_set_mac(hwfn, info->mac, vfid); + else + qed_iov_bulletin_set_forced_mac(hwfn, info->forced_mac, vfid); +} + static void qed_handle_pf_set_vf_unicast(struct qed_hwfn *hwfn) { int i; @@ -4816,18 +4960,20 @@ static void qed_handle_pf_set_vf_unicast(struct qed_hwfn *hwfn) continue; /* Update data on bulletin board */ - mac = qed_iov_bulletin_get_forced_mac(hwfn, i); - if (is_valid_ether_addr(info->forced_mac) && - (!mac || !ether_addr_equal(mac, info->forced_mac))) { + if (info->is_trusted_configured) + mac = qed_iov_bulletin_get_mac(hwfn, i); + else + mac = qed_iov_bulletin_get_forced_mac(hwfn, i); + + if (qed_pf_validate_req_vf_mac(hwfn, mac, info)) { DP_VERBOSE(hwfn, QED_MSG_IOV, "Handling PF setting of VF MAC to VF 0x%02x [Abs 0x%02x]\n", i, hwfn->cdev->p_iov_info->first_vf_in_pf + i); - /* Update bulletin board with forced MAC */ - qed_iov_bulletin_set_forced_mac(hwfn, - info->forced_mac, i); + /* Update bulletin board with MAC */ + qed_set_bulletin_mac(hwfn, info, i); update = true; } @@ -4867,6 +5013,72 @@ static void qed_handle_bulletin_post(struct qed_hwfn *hwfn) qed_ptt_release(hwfn, ptt); } +static void qed_update_mac_for_vf_trust_change(struct qed_hwfn *hwfn, int vf_id) +{ + struct qed_public_vf_info *vf_info; + struct qed_vf_info *vf; + u8 *force_mac; + int i; + + vf_info = qed_iov_get_public_vf_info(hwfn, vf_id, true); + vf = qed_iov_get_vf_info(hwfn, vf_id, true); + + if (!vf_info || !vf) + return; + + /* Force MAC converted to generic MAC in case of VF trust on */ + if (vf_info->is_trusted_configured && + (vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED))) { + force_mac = qed_iov_bulletin_get_forced_mac(hwfn, vf_id); + + if (force_mac) { + /* Clear existing shadow copy of MAC to have a clean + * slate. + */ + for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) { + if (ether_addr_equal(vf->shadow_config.macs[i], + vf_info->mac)) { + memset(vf->shadow_config.macs[i], 0, + ETH_ALEN); + DP_VERBOSE(hwfn, QED_MSG_IOV, + "Shadow MAC %pM removed for VF 0x%02x, VF trust mode is ON\n", + vf_info->mac, vf_id); + break; + } + } + + ether_addr_copy(vf_info->mac, force_mac); + memset(vf_info->forced_mac, 0, ETH_ALEN); + vf->bulletin.p_virt->valid_bitmap &= + ~BIT(MAC_ADDR_FORCED); + qed_schedule_iov(hwfn, QED_IOV_WQ_BULLETIN_UPDATE_FLAG); + } + } + + /* Update shadow copy with VF MAC when trust mode is turned off */ + if (!vf_info->is_trusted_configured) { + u8 empty_mac[ETH_ALEN]; + + memset(empty_mac, 0, ETH_ALEN); + for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) { + if (ether_addr_equal(vf->shadow_config.macs[i], + empty_mac)) { + ether_addr_copy(vf->shadow_config.macs[i], + vf_info->mac); + DP_VERBOSE(hwfn, QED_MSG_IOV, + "Shadow is updated with %pM for VF 0x%02x, VF trust mode is OFF\n", + vf_info->mac, vf_id); + break; + } + } + /* Clear bulletin when trust mode is turned off, + * to have a clean slate for next (normal) operations. + */ + qed_iov_bulletin_set_mac(hwfn, empty_mac, vf_id); + qed_schedule_iov(hwfn, QED_IOV_WQ_BULLETIN_UPDATE_FLAG); + } +} + static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) { struct qed_sp_vport_update_params params; @@ -4890,6 +5102,9 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) continue; vf_info->is_trusted_configured = vf_info->is_trusted_request; + /* Handle forced MAC mode */ + qed_update_mac_for_vf_trust_change(hwfn, i); + /* Validate that the VF has a configured vport */ vf = qed_iov_get_vf_info(hwfn, i, true); if (!vf->vport_instance) diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 91b5e9f02a62..2d7fcd6a0777 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -1375,6 +1375,35 @@ exit: } int +qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, + u8 *p_mac) +{ + struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info; + struct vfpf_bulletin_update_mac_tlv *p_req; + struct pfvf_def_resp_tlv *p_resp; + int rc; + + if (!p_mac) + return -EINVAL; + + /* clear mailbox and prep header tlv */ + p_req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_BULLETIN_UPDATE_MAC, + sizeof(*p_req)); + ether_addr_copy(p_req->mac, p_mac); + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "Requesting bulletin update for MAC[%pM]\n", p_mac); + + /* add list termination tlv */ + qed_add_tlv(p_hwfn, &p_iov->offset, CHANNEL_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); + + p_resp = &p_iov->pf2vf_reply->default_resp; + rc = qed_send_msg2pf(p_hwfn, &p_resp->hdr.status, sizeof(*p_resp)); + qed_vf_pf_req_end(p_hwfn, rc); + return rc; +} + +int qed_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn, u16 rx_coal, u16 tx_coal, struct qed_queue_cid *p_cid) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h index 97d44dfb38ca..4f05d5eb3cf5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.h +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h @@ -518,6 +518,12 @@ struct pfvf_read_coal_resp_tlv { u8 padding[6]; }; +struct vfpf_bulletin_update_mac_tlv { + struct vfpf_first_tlv first_tlv; + u8 mac[ETH_ALEN]; + u8 padding[2]; +}; + union vfpf_tlvs { struct vfpf_first_tlv first_tlv; struct vfpf_acquire_tlv acquire; @@ -532,6 +538,7 @@ union vfpf_tlvs { struct vfpf_update_tunn_param_tlv tunn_param_update; struct vfpf_update_coalesce update_coalesce; struct vfpf_read_coal_req_tlv read_coal_req; + struct vfpf_bulletin_update_mac_tlv bulletin_update_mac; struct tlv_buffer_size tlv_buf_size; }; @@ -650,6 +657,7 @@ enum { CHANNEL_TLV_COALESCE_UPDATE, CHANNEL_TLV_QID, CHANNEL_TLV_COALESCE_READ, + CHANNEL_TLV_BULLETIN_UPDATE_MAC, CHANNEL_TLV_MAX, /* Required for iterating over vport-update tlvs. @@ -1042,6 +1050,13 @@ int qed_vf_pf_tunnel_param_update(struct qed_hwfn *p_hwfn, struct qed_tunnel_info *p_tunn); u32 qed_vf_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id); +/** + * @brief - Ask PF to update the MAC address in it's bulletin board + * + * @param p_mac - mac address to be updated in bulletin board + */ +int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, u8 *p_mac); + #else static inline void qed_vf_get_link_params(struct qed_hwfn *p_hwfn, struct qed_mcp_link_params *params) @@ -1228,6 +1243,12 @@ static inline int qed_vf_pf_tunnel_param_update(struct qed_hwfn *p_hwfn, return -EINVAL; } +static inline int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, + u8 *p_mac) +{ + return -EINVAL; +} + static inline u32 qed_vf_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 6687e04d1558..43569b1839be 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -550,8 +550,7 @@ void qede_force_mac(void *dev, u8 *mac, bool forced) __qede_lock(edev); - /* MAC hints take effect only if we haven't set one already */ - if (is_valid_ether_addr(edev->ndev->dev_addr) && !forced) { + if (!is_valid_ether_addr(mac)) { __qede_unlock(edev); return; } @@ -1161,6 +1160,10 @@ int qede_set_mac_addr(struct net_device *ndev, void *p) if (edev->state != QEDE_STATE_OPEN) { DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "The device is currently down\n"); + /* Ask PF to explicitly update a copy in bulletin board */ + if (IS_VF(edev) && edev->ops->req_bulletin_update_mac) + edev->ops->req_bulletin_update_mac(edev->cdev, + ndev->dev_addr); goto out; } diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 7d4e68907055..fcd42d0387b9 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -88,7 +88,7 @@ static const int multicast_filter_limit = 32; #define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */ #define R8169_REGS_SIZE 256 -#define R8169_NAPI_WEIGHT 64 +#define R8169_RX_BUF_SIZE (SZ_16K - 1) #define NUM_TX_DESC 64 /* Number of Tx descriptor registers */ #define NUM_RX_DESC 256U /* Number of Rx descriptor registers */ #define R8169_TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc)) @@ -171,12 +171,11 @@ enum rtl_tx_desc_version { #define JUMBO_7K (7*1024 - ETH_HLEN - 2) #define JUMBO_9K (9*1024 - ETH_HLEN - 2) -#define _R(NAME,TD,FW,SZ,B) { \ +#define _R(NAME,TD,FW,SZ) { \ .name = NAME, \ .txd_version = TD, \ .fw_name = FW, \ .jumbo_max = SZ, \ - .jumbo_tx_csum = B \ } static const struct { @@ -184,135 +183,111 @@ static const struct { enum rtl_tx_desc_version txd_version; const char *fw_name; u16 jumbo_max; - bool jumbo_tx_csum; } rtl_chip_infos[] = { /* PCI devices. */ [RTL_GIGA_MAC_VER_01] = - _R("RTL8169", RTL_TD_0, NULL, JUMBO_7K, true), + _R("RTL8169", RTL_TD_0, NULL, JUMBO_7K), [RTL_GIGA_MAC_VER_02] = - _R("RTL8169s", RTL_TD_0, NULL, JUMBO_7K, true), + _R("RTL8169s", RTL_TD_0, NULL, JUMBO_7K), [RTL_GIGA_MAC_VER_03] = - _R("RTL8110s", RTL_TD_0, NULL, JUMBO_7K, true), + _R("RTL8110s", RTL_TD_0, NULL, JUMBO_7K), [RTL_GIGA_MAC_VER_04] = - _R("RTL8169sb/8110sb", RTL_TD_0, NULL, JUMBO_7K, true), + _R("RTL8169sb/8110sb", RTL_TD_0, NULL, JUMBO_7K), [RTL_GIGA_MAC_VER_05] = - _R("RTL8169sc/8110sc", RTL_TD_0, NULL, JUMBO_7K, true), + _R("RTL8169sc/8110sc", RTL_TD_0, NULL, JUMBO_7K), [RTL_GIGA_MAC_VER_06] = - _R("RTL8169sc/8110sc", RTL_TD_0, NULL, JUMBO_7K, true), + _R("RTL8169sc/8110sc", RTL_TD_0, NULL, JUMBO_7K), /* PCI-E devices. */ [RTL_GIGA_MAC_VER_07] = - _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K, true), + _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_08] = - _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K, true), + _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_09] = - _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K, true), + _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_10] = - _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K, true), + _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_11] = - _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K, false), + _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K), [RTL_GIGA_MAC_VER_12] = - _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K, false), + _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K), [RTL_GIGA_MAC_VER_13] = - _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K, true), + _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_14] = - _R("RTL8100e", RTL_TD_0, NULL, JUMBO_1K, true), + _R("RTL8100e", RTL_TD_0, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_15] = - _R("RTL8100e", RTL_TD_0, NULL, JUMBO_1K, true), + _R("RTL8100e", RTL_TD_0, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_16] = - _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K, true), + _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_17] = - _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K, false), + _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K), [RTL_GIGA_MAC_VER_18] = - _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K, false), + _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K), [RTL_GIGA_MAC_VER_19] = - _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K, false), + _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K), [RTL_GIGA_MAC_VER_20] = - _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K, false), + _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K), [RTL_GIGA_MAC_VER_21] = - _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K, false), + _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K), [RTL_GIGA_MAC_VER_22] = - _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K, false), + _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K), [RTL_GIGA_MAC_VER_23] = - _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K, false), + _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K), [RTL_GIGA_MAC_VER_24] = - _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K, false), + _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K), [RTL_GIGA_MAC_VER_25] = - _R("RTL8168d/8111d", RTL_TD_1, FIRMWARE_8168D_1, - JUMBO_9K, false), + _R("RTL8168d/8111d", RTL_TD_1, FIRMWARE_8168D_1, JUMBO_9K), [RTL_GIGA_MAC_VER_26] = - _R("RTL8168d/8111d", RTL_TD_1, FIRMWARE_8168D_2, - JUMBO_9K, false), + _R("RTL8168d/8111d", RTL_TD_1, FIRMWARE_8168D_2, JUMBO_9K), [RTL_GIGA_MAC_VER_27] = - _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K, false), + _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K), [RTL_GIGA_MAC_VER_28] = - _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K, false), + _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K), [RTL_GIGA_MAC_VER_29] = - _R("RTL8105e", RTL_TD_1, FIRMWARE_8105E_1, - JUMBO_1K, true), + _R("RTL8105e", RTL_TD_1, FIRMWARE_8105E_1, JUMBO_1K), [RTL_GIGA_MAC_VER_30] = - _R("RTL8105e", RTL_TD_1, FIRMWARE_8105E_1, - JUMBO_1K, true), + _R("RTL8105e", RTL_TD_1, FIRMWARE_8105E_1, JUMBO_1K), [RTL_GIGA_MAC_VER_31] = - _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K, false), + _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K), [RTL_GIGA_MAC_VER_32] = - _R("RTL8168e/8111e", RTL_TD_1, FIRMWARE_8168E_1, - JUMBO_9K, false), + _R("RTL8168e/8111e", RTL_TD_1, FIRMWARE_8168E_1, JUMBO_9K), [RTL_GIGA_MAC_VER_33] = - _R("RTL8168e/8111e", RTL_TD_1, FIRMWARE_8168E_2, - JUMBO_9K, false), + _R("RTL8168e/8111e", RTL_TD_1, FIRMWARE_8168E_2, JUMBO_9K), [RTL_GIGA_MAC_VER_34] = - _R("RTL8168evl/8111evl",RTL_TD_1, FIRMWARE_8168E_3, - JUMBO_9K, false), + _R("RTL8168evl/8111evl",RTL_TD_1, FIRMWARE_8168E_3, JUMBO_9K), [RTL_GIGA_MAC_VER_35] = - _R("RTL8168f/8111f", RTL_TD_1, FIRMWARE_8168F_1, - JUMBO_9K, false), + _R("RTL8168f/8111f", RTL_TD_1, FIRMWARE_8168F_1, JUMBO_9K), [RTL_GIGA_MAC_VER_36] = - _R("RTL8168f/8111f", RTL_TD_1, FIRMWARE_8168F_2, - JUMBO_9K, false), + _R("RTL8168f/8111f", RTL_TD_1, FIRMWARE_8168F_2, JUMBO_9K), [RTL_GIGA_MAC_VER_37] = - _R("RTL8402", RTL_TD_1, FIRMWARE_8402_1, - JUMBO_1K, true), + _R("RTL8402", RTL_TD_1, FIRMWARE_8402_1, JUMBO_1K), [RTL_GIGA_MAC_VER_38] = - _R("RTL8411", RTL_TD_1, FIRMWARE_8411_1, - JUMBO_9K, false), + _R("RTL8411", RTL_TD_1, FIRMWARE_8411_1, JUMBO_9K), [RTL_GIGA_MAC_VER_39] = - _R("RTL8106e", RTL_TD_1, FIRMWARE_8106E_1, - JUMBO_1K, true), + _R("RTL8106e", RTL_TD_1, FIRMWARE_8106E_1, JUMBO_1K), [RTL_GIGA_MAC_VER_40] = - _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_2, - JUMBO_9K, false), + _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_2, JUMBO_9K), [RTL_GIGA_MAC_VER_41] = - _R("RTL8168g/8111g", RTL_TD_1, NULL, JUMBO_9K, false), + _R("RTL8168g/8111g", RTL_TD_1, NULL, JUMBO_9K), [RTL_GIGA_MAC_VER_42] = - _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_3, - JUMBO_9K, false), + _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_3, JUMBO_9K), [RTL_GIGA_MAC_VER_43] = - _R("RTL8106e", RTL_TD_1, FIRMWARE_8106E_2, - JUMBO_1K, true), + _R("RTL8106e", RTL_TD_1, FIRMWARE_8106E_2, JUMBO_1K), [RTL_GIGA_MAC_VER_44] = - _R("RTL8411", RTL_TD_1, FIRMWARE_8411_2, - JUMBO_9K, false), + _R("RTL8411", RTL_TD_1, FIRMWARE_8411_2, JUMBO_9K), [RTL_GIGA_MAC_VER_45] = - _R("RTL8168h/8111h", RTL_TD_1, FIRMWARE_8168H_1, - JUMBO_9K, false), + _R("RTL8168h/8111h", RTL_TD_1, FIRMWARE_8168H_1, JUMBO_9K), [RTL_GIGA_MAC_VER_46] = - _R("RTL8168h/8111h", RTL_TD_1, FIRMWARE_8168H_2, - JUMBO_9K, false), + _R("RTL8168h/8111h", RTL_TD_1, FIRMWARE_8168H_2, JUMBO_9K), [RTL_GIGA_MAC_VER_47] = - _R("RTL8107e", RTL_TD_1, FIRMWARE_8107E_1, - JUMBO_1K, false), + _R("RTL8107e", RTL_TD_1, FIRMWARE_8107E_1, JUMBO_1K), [RTL_GIGA_MAC_VER_48] = - _R("RTL8107e", RTL_TD_1, FIRMWARE_8107E_2, - JUMBO_1K, false), + _R("RTL8107e", RTL_TD_1, FIRMWARE_8107E_2, JUMBO_1K), [RTL_GIGA_MAC_VER_49] = - _R("RTL8168ep/8111ep", RTL_TD_1, NULL, - JUMBO_9K, false), + _R("RTL8168ep/8111ep", RTL_TD_1, NULL, JUMBO_9K), [RTL_GIGA_MAC_VER_50] = - _R("RTL8168ep/8111ep", RTL_TD_1, NULL, - JUMBO_9K, false), + _R("RTL8168ep/8111ep", RTL_TD_1, NULL, JUMBO_9K), [RTL_GIGA_MAC_VER_51] = - _R("RTL8168ep/8111ep", RTL_TD_1, NULL, - JUMBO_9K, false), + _R("RTL8168ep/8111ep", RTL_TD_1, NULL, JUMBO_9K), }; #undef _R @@ -344,7 +319,6 @@ static const struct pci_device_id rtl8169_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl); -static int rx_buf_sz = 16383; static int use_dac = -1; static struct { u32 msg_enable; @@ -777,7 +751,6 @@ struct rtl8169_private { struct net_device *dev; struct napi_struct napi; u32 msg_enable; - u16 txd_version; u16 mac_version; u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */ u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */ @@ -820,7 +793,7 @@ struct rtl8169_private { int (*get_link_ksettings)(struct net_device *, struct ethtool_link_ksettings *); void (*phy_reset_enable)(struct rtl8169_private *tp); - void (*hw_start)(struct net_device *); + void (*hw_start)(struct rtl8169_private *tp); unsigned int (*phy_reset_pending)(struct rtl8169_private *tp); unsigned int (*link_ok)(struct rtl8169_private *tp); int (*do_ioctl)(struct rtl8169_private *tp, struct mii_ioctl_data *data, int cmd); @@ -832,14 +805,11 @@ struct rtl8169_private { struct work_struct work; } wk; - unsigned features; - struct mii_if_info mii; dma_addr_t counters_phys_addr; struct rtl8169_counters *counters; struct rtl8169_tc_offsets tc_offset; u32 saved_wolopts; - u32 opts1_mask; struct rtl_fw { const struct firmware *fw; @@ -1959,7 +1929,7 @@ static netdev_features_t rtl8169_fix_features(struct net_device *dev, features &= ~NETIF_F_ALL_TSO; if (dev->mtu > JUMBO_1K && - !rtl_chip_infos[tp->mac_version].jumbo_tx_csum) + tp->mac_version > RTL_GIGA_MAC_VER_06) features &= ~NETIF_F_IP_CSUM; return features; @@ -2154,9 +2124,8 @@ DECLARE_RTL_COND(rtl_counters_cond) return RTL_R32(tp, CounterAddrLow) & (CounterReset | CounterDump); } -static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd) +static bool rtl8169_do_counters(struct rtl8169_private *tp, u32 counter_cmd) { - struct rtl8169_private *tp = netdev_priv(dev); dma_addr_t paddr = tp->counters_phys_addr; u32 cmd; @@ -2169,10 +2138,8 @@ static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd) return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); } -static bool rtl8169_reset_counters(struct net_device *dev) +static bool rtl8169_reset_counters(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); - /* * Versions prior to RTL_GIGA_MAC_VER_19 don't support resetting the * tally counters. @@ -2180,13 +2147,11 @@ static bool rtl8169_reset_counters(struct net_device *dev) if (tp->mac_version < RTL_GIGA_MAC_VER_19) return true; - return rtl8169_do_counters(dev, CounterReset); + return rtl8169_do_counters(tp, CounterReset); } -static bool rtl8169_update_counters(struct net_device *dev) +static bool rtl8169_update_counters(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); - /* * Some chips are unable to dump tally counters when the receiver * is disabled. @@ -2194,12 +2159,11 @@ static bool rtl8169_update_counters(struct net_device *dev) if ((RTL_R8(tp, ChipCmd) & CmdRxEnb) == 0) return true; - return rtl8169_do_counters(dev, CounterDump); + return rtl8169_do_counters(tp, CounterDump); } -static bool rtl8169_init_counter_offsets(struct net_device *dev) +static bool rtl8169_init_counter_offsets(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); struct rtl8169_counters *counters = tp->counters; bool ret = false; @@ -2222,10 +2186,10 @@ static bool rtl8169_init_counter_offsets(struct net_device *dev) return true; /* If both, reset and update fail, propagate to caller. */ - if (rtl8169_reset_counters(dev)) + if (rtl8169_reset_counters(tp)) ret = true; - if (rtl8169_update_counters(dev)) + if (rtl8169_update_counters(tp)) ret = true; tp->tc_offset.tx_errors = counters->tx_errors; @@ -2248,7 +2212,7 @@ static void rtl8169_get_ethtool_stats(struct net_device *dev, pm_runtime_get_noresume(d); if (pm_runtime_active(d)) - rtl8169_update_counters(dev); + rtl8169_update_counters(tp); pm_runtime_put_noidle(d); @@ -2559,12 +2523,10 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp, /* 8168E family. */ { 0x7c800000, 0x2c800000, RTL_GIGA_MAC_VER_34 }, - { 0x7cf00000, 0x2c200000, RTL_GIGA_MAC_VER_33 }, { 0x7cf00000, 0x2c100000, RTL_GIGA_MAC_VER_32 }, { 0x7c800000, 0x2c000000, RTL_GIGA_MAC_VER_33 }, /* 8168D family. */ - { 0x7cf00000, 0x28300000, RTL_GIGA_MAC_VER_26 }, { 0x7cf00000, 0x28100000, RTL_GIGA_MAC_VER_25 }, { 0x7c800000, 0x28000000, RTL_GIGA_MAC_VER_26 }, @@ -2574,32 +2536,24 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp, { 0x7cf00000, 0x28b00000, RTL_GIGA_MAC_VER_31 }, /* 8168C family. */ - { 0x7cf00000, 0x3cb00000, RTL_GIGA_MAC_VER_24 }, { 0x7cf00000, 0x3c900000, RTL_GIGA_MAC_VER_23 }, { 0x7cf00000, 0x3c800000, RTL_GIGA_MAC_VER_18 }, { 0x7c800000, 0x3c800000, RTL_GIGA_MAC_VER_24 }, { 0x7cf00000, 0x3c000000, RTL_GIGA_MAC_VER_19 }, { 0x7cf00000, 0x3c200000, RTL_GIGA_MAC_VER_20 }, { 0x7cf00000, 0x3c300000, RTL_GIGA_MAC_VER_21 }, - { 0x7cf00000, 0x3c400000, RTL_GIGA_MAC_VER_22 }, { 0x7c800000, 0x3c000000, RTL_GIGA_MAC_VER_22 }, /* 8168B family. */ { 0x7cf00000, 0x38000000, RTL_GIGA_MAC_VER_12 }, - { 0x7cf00000, 0x38500000, RTL_GIGA_MAC_VER_17 }, { 0x7c800000, 0x38000000, RTL_GIGA_MAC_VER_17 }, { 0x7c800000, 0x30000000, RTL_GIGA_MAC_VER_11 }, /* 8101 family. */ - { 0x7cf00000, 0x44900000, RTL_GIGA_MAC_VER_39 }, { 0x7c800000, 0x44800000, RTL_GIGA_MAC_VER_39 }, { 0x7c800000, 0x44000000, RTL_GIGA_MAC_VER_37 }, - { 0x7cf00000, 0x40b00000, RTL_GIGA_MAC_VER_30 }, - { 0x7cf00000, 0x40a00000, RTL_GIGA_MAC_VER_30 }, { 0x7cf00000, 0x40900000, RTL_GIGA_MAC_VER_29 }, { 0x7c800000, 0x40800000, RTL_GIGA_MAC_VER_30 }, - { 0x7cf00000, 0x34a00000, RTL_GIGA_MAC_VER_09 }, - { 0x7cf00000, 0x24a00000, RTL_GIGA_MAC_VER_09 }, { 0x7cf00000, 0x34900000, RTL_GIGA_MAC_VER_08 }, { 0x7cf00000, 0x24900000, RTL_GIGA_MAC_VER_08 }, { 0x7cf00000, 0x34800000, RTL_GIGA_MAC_VER_07 }, @@ -5357,12 +5311,9 @@ static void rtl_set_rx_tx_config_registers(struct rtl8169_private *tp) (InterFrameGap << TxInterFrameGapShift)); } -static void rtl_hw_start(struct net_device *dev) +static void rtl_hw_start(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); - - tp->hw_start(dev); - + tp->hw_start(tp); rtl_irq_enable_all(tp); } @@ -5388,10 +5339,10 @@ static u16 rtl_rw_cpluscmd(struct rtl8169_private *tp) return cmd; } -static void rtl_set_rx_max_size(struct rtl8169_private *tp, unsigned int rx_buf_sz) +static void rtl_set_rx_max_size(struct rtl8169_private *tp) { /* Low hurts. Let's disable the filtering. */ - RTL_W16(tp, RxMaxSize, rx_buf_sz + 1); + RTL_W16(tp, RxMaxSize, R8169_RX_BUF_SIZE + 1); } static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version) @@ -5471,14 +5422,11 @@ static void rtl_set_rx_mode(struct net_device *dev) RTL_W32(tp, RxConfig, tmp); } -static void rtl_hw_start_8169(struct net_device *dev) +static void rtl_hw_start_8169(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); - struct pci_dev *pdev = tp->pci_dev; - if (tp->mac_version == RTL_GIGA_MAC_VER_05) { RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) | PCIMulRW); - pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x08); + pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08); } RTL_W8(tp, Cfg9346, Cfg9346_Unlock); @@ -5492,7 +5440,7 @@ static void rtl_hw_start_8169(struct net_device *dev) RTL_W8(tp, EarlyTxThres, NoEarlyTx); - rtl_set_rx_max_size(tp, rx_buf_sz); + rtl_set_rx_max_size(tp); if (tp->mac_version == RTL_GIGA_MAC_VER_01 || tp->mac_version == RTL_GIGA_MAC_VER_02 || @@ -5536,7 +5484,7 @@ static void rtl_hw_start_8169(struct net_device *dev) RTL_W32(tp, RxMissed, 0); - rtl_set_rx_mode(dev); + rtl_set_rx_mode(tp->dev); /* no early-rx interrupts */ RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000); @@ -6324,15 +6272,13 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp) r8168_mac_ocp_write(tp, 0xe860, data); } -static void rtl_hw_start_8168(struct net_device *dev) +static void rtl_hw_start_8168(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); RTL_W8(tp, MaxTxPacketSize, TxPacketMax); - rtl_set_rx_max_size(tp, rx_buf_sz); + rtl_set_rx_max_size(tp); tp->cp_cmd |= RTL_R16(tp, CPlusCmd) | PktCntrDisable | INTT_1; @@ -6452,7 +6398,7 @@ static void rtl_hw_start_8168(struct net_device *dev) default: printk(KERN_ERR PFX "%s: unknown chipset (mac_version = %d).\n", - dev->name, tp->mac_version); + tp->dev->name, tp->mac_version); break; } @@ -6460,7 +6406,7 @@ static void rtl_hw_start_8168(struct net_device *dev) RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb); - rtl_set_rx_mode(dev); + rtl_set_rx_mode(tp->dev); RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000); } @@ -6599,24 +6545,21 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp) rtl_pcie_state_l2l3_enable(tp, false); } -static void rtl_hw_start_8101(struct net_device *dev) +static void rtl_hw_start_8101(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); - struct pci_dev *pdev = tp->pci_dev; - if (tp->mac_version >= RTL_GIGA_MAC_VER_30) tp->event_slow &= ~RxFIFOOver; if (tp->mac_version == RTL_GIGA_MAC_VER_13 || tp->mac_version == RTL_GIGA_MAC_VER_16) - pcie_capability_set_word(pdev, PCI_EXP_DEVCTL, + pcie_capability_set_word(tp->pci_dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_NOSNOOP_EN); RTL_W8(tp, Cfg9346, Cfg9346_Unlock); RTL_W8(tp, MaxTxPacketSize, TxPacketMax); - rtl_set_rx_max_size(tp, rx_buf_sz); + rtl_set_rx_max_size(tp); tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK; RTL_W16(tp, CPlusCmd, tp->cp_cmd); @@ -6667,7 +6610,7 @@ static void rtl_hw_start_8101(struct net_device *dev) RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb); - rtl_set_rx_mode(dev); + rtl_set_rx_mode(tp->dev); RTL_R8(tp, IntrMask); @@ -6698,29 +6641,22 @@ static inline void rtl8169_make_unusable_by_asic(struct RxDesc *desc) static void rtl8169_free_rx_databuff(struct rtl8169_private *tp, void **data_buff, struct RxDesc *desc) { - dma_unmap_single(tp_to_dev(tp), le64_to_cpu(desc->addr), rx_buf_sz, - DMA_FROM_DEVICE); + dma_unmap_single(tp_to_dev(tp), le64_to_cpu(desc->addr), + R8169_RX_BUF_SIZE, DMA_FROM_DEVICE); kfree(*data_buff); *data_buff = NULL; rtl8169_make_unusable_by_asic(desc); } -static inline void rtl8169_mark_to_asic(struct RxDesc *desc, u32 rx_buf_sz) +static inline void rtl8169_mark_to_asic(struct RxDesc *desc) { u32 eor = le32_to_cpu(desc->opts1) & RingEnd; /* Force memory writes to complete before releasing descriptor */ dma_wmb(); - desc->opts1 = cpu_to_le32(DescOwn | eor | rx_buf_sz); -} - -static inline void rtl8169_map_to_asic(struct RxDesc *desc, dma_addr_t mapping, - u32 rx_buf_sz) -{ - desc->addr = cpu_to_le64(mapping); - rtl8169_mark_to_asic(desc, rx_buf_sz); + desc->opts1 = cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE); } static inline void *rtl8169_align(void *data) @@ -6734,21 +6670,20 @@ static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp, void *data; dma_addr_t mapping; struct device *d = tp_to_dev(tp); - struct net_device *dev = tp->dev; - int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1; + int node = dev_to_node(d); - data = kmalloc_node(rx_buf_sz, GFP_KERNEL, node); + data = kmalloc_node(R8169_RX_BUF_SIZE, GFP_KERNEL, node); if (!data) return NULL; if (rtl8169_align(data) != data) { kfree(data); - data = kmalloc_node(rx_buf_sz + 15, GFP_KERNEL, node); + data = kmalloc_node(R8169_RX_BUF_SIZE + 15, GFP_KERNEL, node); if (!data) return NULL; } - mapping = dma_map_single(d, rtl8169_align(data), rx_buf_sz, + mapping = dma_map_single(d, rtl8169_align(data), R8169_RX_BUF_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(d, mapping))) { if (net_ratelimit()) @@ -6756,7 +6691,8 @@ static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp, goto err_out; } - rtl8169_map_to_asic(desc, mapping, rx_buf_sz); + desc->addr = cpu_to_le64(mapping); + rtl8169_mark_to_asic(desc); return data; err_out: @@ -6788,9 +6724,6 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp) for (i = 0; i < NUM_RX_DESC; i++) { void *data; - if (tp->Rx_databuff[i]) - continue; - data = rtl8169_alloc_rx_data(tp, tp->RxDescArray + i); if (!data) { rtl8169_make_unusable_by_asic(tp->RxDescArray + i); @@ -6807,14 +6740,12 @@ err_out: return -ENOMEM; } -static int rtl8169_init_ring(struct net_device *dev) +static int rtl8169_init_ring(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); - rtl8169_init_ring_indexes(tp); - memset(tp->tx_skb, 0x0, NUM_TX_DESC * sizeof(struct ring_info)); - memset(tp->Rx_databuff, 0x0, NUM_RX_DESC * sizeof(void *)); + memset(tp->tx_skb, 0, sizeof(tp->tx_skb)); + memset(tp->Rx_databuff, 0, sizeof(tp->Rx_databuff)); return rtl8169_rx_fill(tp); } @@ -6873,13 +6804,13 @@ static void rtl_reset_work(struct rtl8169_private *tp) rtl8169_hw_reset(tp); for (i = 0; i < NUM_RX_DESC; i++) - rtl8169_mark_to_asic(tp->RxDescArray + i, rx_buf_sz); + rtl8169_mark_to_asic(tp->RxDescArray + i); rtl8169_tx_clear(tp); rtl8169_init_ring_indexes(tp); napi_enable(&tp->napi); - rtl_hw_start(dev); + rtl_hw_start(tp); netif_wake_queue(dev); rtl8169_check_link_status(dev, tp); } @@ -7361,7 +7292,7 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data, prefetch(data); skb = napi_alloc_skb(&tp->napi, pkt_size); if (skb) - memcpy(skb->data, data, pkt_size); + skb_copy_to_linear_data(skb, data, pkt_size); dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE); return skb; @@ -7379,7 +7310,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget struct RxDesc *desc = tp->RxDescArray + entry; u32 status; - status = le32_to_cpu(desc->opts1) & tp->opts1_mask; + status = le32_to_cpu(desc->opts1); if (status & DescOwn) break; @@ -7397,14 +7328,16 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget dev->stats.rx_length_errors++; if (status & RxCRC) dev->stats.rx_crc_errors++; - if (status & RxFOVF) { + /* RxFOVF is a reserved bit on later chip versions */ + if (tp->mac_version == RTL_GIGA_MAC_VER_01 && + status & RxFOVF) { rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); dev->stats.rx_fifo_errors++; - } - if ((status & (RxRUNT | RxCRC)) && - !(status & (RxRWT | RxFOVF)) && - (dev->features & NETIF_F_RXALL)) + } else if (status & (RxRUNT | RxCRC) && + !(status & RxRWT) && + dev->features & NETIF_F_RXALL) { goto process_pkt; + } } else { struct sk_buff *skb; dma_addr_t addr; @@ -7453,7 +7386,7 @@ process_pkt: } release_descriptor: desc->opts2 = 0; - rtl8169_mark_to_asic(desc, rx_buf_sz); + rtl8169_mark_to_asic(desc); } count = cur_rx - tp->cur_rx; @@ -7464,8 +7397,7 @@ release_descriptor: static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) { - struct net_device *dev = dev_instance; - struct rtl8169_private *tp = netdev_priv(dev); + struct rtl8169_private *tp = dev_instance; int handled = 0; u16 status; @@ -7476,7 +7408,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) handled = 1; rtl_irq_disable(tp); - napi_schedule(&tp->napi); + napi_schedule_irqoff(&tp->napi); } } return IRQ_RETVAL(handled); @@ -7627,7 +7559,7 @@ static int rtl8169_close(struct net_device *dev) pm_runtime_get_sync(&pdev->dev); /* Update counters before going down */ - rtl8169_update_counters(dev); + rtl8169_update_counters(tp); rtl_lock_work(tp); clear_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags); @@ -7637,7 +7569,7 @@ static int rtl8169_close(struct net_device *dev) cancel_work_sync(&tp->wk.work); - pci_free_irq(pdev, 0, dev); + pci_free_irq(pdev, 0, tp); dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray, tp->RxPhyAddr); @@ -7682,7 +7614,7 @@ static int rtl_open(struct net_device *dev) if (!tp->RxDescArray) goto err_free_tx_0; - retval = rtl8169_init_ring(dev); + retval = rtl8169_init_ring(tp); if (retval < 0) goto err_free_rx_1; @@ -7692,7 +7624,7 @@ static int rtl_open(struct net_device *dev) rtl_request_firmware(tp); - retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, dev, + retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, tp, dev->name); if (retval < 0) goto err_release_fw_2; @@ -7709,9 +7641,9 @@ static int rtl_open(struct net_device *dev) rtl_pll_power_up(tp); - rtl_hw_start(dev); + rtl_hw_start(tp); - if (!rtl8169_init_counter_offsets(dev)) + if (!rtl8169_init_counter_offsets(tp)) netif_warn(tp, hw, dev, "counter reset/update failed\n"); netif_start_queue(dev); @@ -7780,7 +7712,7 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) * from tally counters. */ if (pm_runtime_active(&pdev->dev)) - rtl8169_update_counters(dev); + rtl8169_update_counters(tp); /* * Subtract values fetched during initalization. @@ -7876,7 +7808,7 @@ static int rtl8169_runtime_suspend(struct device *device) /* Update counters before going runtime suspend */ rtl8169_rx_missed(dev); - rtl8169_update_counters(dev); + rtl8169_update_counters(tp); return 0; } @@ -8016,9 +7948,7 @@ static const struct net_device_ops rtl_netdev_ops = { }; static const struct rtl_cfg_info { - void (*hw_start)(struct net_device *); - unsigned int region; - unsigned int align; + void (*hw_start)(struct rtl8169_private *tp); u16 event_slow; unsigned int has_gmii:1; const struct rtl_coalesce_info *coalesce_info; @@ -8026,8 +7956,6 @@ static const struct rtl_cfg_info { } rtl_cfg_infos [] = { [RTL_CFG_0] = { .hw_start = rtl_hw_start_8169, - .region = 1, - .align = 0, .event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver, .has_gmii = 1, .coalesce_info = rtl_coalesce_info_8169, @@ -8035,8 +7963,6 @@ static const struct rtl_cfg_info { }, [RTL_CFG_1] = { .hw_start = rtl_hw_start_8168, - .region = 2, - .align = 8, .event_slow = SYSErr | LinkChg | RxOverflow, .has_gmii = 1, .coalesce_info = rtl_coalesce_info_8168_8136, @@ -8044,8 +7970,6 @@ static const struct rtl_cfg_info { }, [RTL_CFG_2] = { .hw_start = rtl_hw_start_8101, - .region = 2, - .align = 8, .event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver | PCSTimeout, .coalesce_info = rtl_coalesce_info_8168_8136, @@ -8145,11 +8069,10 @@ static void rtl_hw_initialize(struct rtl8169_private *tp) static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data; - const unsigned int region = cfg->region; struct rtl8169_private *tp; struct mii_if_info *mii; struct net_device *dev; - int chipset, i; + int chipset, region, i; int rc; if (netif_msg_drv(&debug)) { @@ -8191,11 +8114,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (pcim_set_mwi(pdev) < 0) netif_info(tp, probe, dev, "Mem-Wr-Inval unavailable\n"); - /* make sure PCI base addr 1 is MMIO */ - if (!(pci_resource_flags(pdev, region) & IORESOURCE_MEM)) { - netif_err(tp, probe, dev, - "region #%d not an MMIO resource, aborting\n", - region); + /* use first MMIO region */ + region = ffs(pci_select_bars(pdev, IORESOURCE_MEM)) - 1; + if (region < 0) { + netif_err(tp, probe, dev, "no MMIO resource found\n"); return -ENODEV; } @@ -8260,7 +8182,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rtl8169_print_mac_version(tp); chipset = tp->mac_version; - tp->txd_version = rtl_chip_infos[chipset].txd_version; rc = rtl_alloc_irq(tp); if (rc < 0) { @@ -8322,7 +8243,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->ethtool_ops = &rtl8169_ethtool_ops; dev->watchdog_timeo = RTL8169_TX_TIMEOUT; - netif_napi_add(dev, &tp->napi, rtl8169_poll, R8169_NAPI_WEIGHT); + netif_napi_add(dev, &tp->napi, rtl8169_poll, NAPI_POLL_WEIGHT); /* don't enable SG, IP_CSUM and TSO by default - it might not work * properly for all devices */ @@ -8345,13 +8266,17 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* Disallow toggling */ dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX; - if (tp->txd_version == RTL_TD_0) + switch (rtl_chip_infos[chipset].txd_version) { + case RTL_TD_0: tp->tso_csum = rtl8169_tso_csum_v1; - else if (tp->txd_version == RTL_TD_1) { + break; + case RTL_TD_1: tp->tso_csum = rtl8169_tso_csum_v2; dev->hw_features |= NETIF_F_IPV6_CSUM | NETIF_F_TSO6; - } else + break; + default: WARN_ON_ONCE(1); + } dev->hw_features |= NETIF_F_RXALL; dev->hw_features |= NETIF_F_RXFCS; @@ -8364,9 +8289,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->event_slow = cfg->event_slow; tp->coalesce_info = cfg->coalesce_info; - tp->opts1_mask = (tp->mac_version != RTL_GIGA_MAC_VER_01) ? - ~(RxBOVF | RxFOVF) : ~0; - timer_setup(&tp->timer, rtl8169_phy_timer, 0); tp->rtl_fw = RTL_FIRMWARE_UNKNOWN; @@ -8383,15 +8305,15 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc < 0) return rc; - netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n", - rtl_chip_infos[chipset].name, tp->mmio_addr, dev->dev_addr, - (u32)(RTL_R32(tp, TxConfig) & 0x9cf0f8ff), + netif_info(tp, probe, dev, "%s, %pM, XID %08x, IRQ %d\n", + rtl_chip_infos[chipset].name, dev->dev_addr, + (u32)(RTL_R32(tp, TxConfig) & 0xfcf0f8ff), pci_irq_vector(pdev, 0)); if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) { netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, " "tx checksumming: %s]\n", rtl_chip_infos[chipset].jumbo_max, - rtl_chip_infos[chipset].jumbo_tx_csum ? "ok" : "ko"); + tp->mac_version <= RTL_GIGA_MAC_VER_06 ? "ok" : "ko"); } if (r8168_check_dash(tp)) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 692dd729ee2a..c30b9e26f131 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1551,6 +1551,38 @@ static int efx_probe_interrupts(struct efx_nic *efx) return 0; } +#if defined(CONFIG_SMP) +static void efx_set_interrupt_affinity(struct efx_nic *efx) +{ + struct efx_channel *channel; + unsigned int cpu; + + efx_for_each_channel(channel, efx) { + cpu = cpumask_local_spread(channel->channel, + pcibus_to_node(efx->pci_dev->bus)); + irq_set_affinity_hint(channel->irq, cpumask_of(cpu)); + } +} + +static void efx_clear_interrupt_affinity(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + irq_set_affinity_hint(channel->irq, NULL); +} +#else +static void +efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) +{ +} + +static void +efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) +{ +} +#endif /* CONFIG_SMP */ + static int efx_soft_enable_interrupts(struct efx_nic *efx) { struct efx_channel *channel, *end_channel; @@ -3165,6 +3197,7 @@ static void efx_pci_remove_main(struct efx_nic *efx) cancel_work_sync(&efx->reset_work); efx_disable_interrupts(efx); + efx_clear_interrupt_affinity(efx); efx_nic_fini_interrupt(efx); efx_fini_port(efx); efx->type->fini(efx); @@ -3314,6 +3347,8 @@ static int efx_pci_probe_main(struct efx_nic *efx) rc = efx_nic_init_interrupt(efx); if (rc) goto fail5; + + efx_set_interrupt_affinity(efx); rc = efx_enable_interrupts(efx); if (rc) goto fail6; @@ -3321,6 +3356,7 @@ static int efx_pci_probe_main(struct efx_nic *efx) return 0; fail6: + efx_clear_interrupt_affinity(efx); efx_nic_fini_interrupt(efx); fail5: efx_fini_port(efx); diff --git a/drivers/net/ethernet/socionext/Kconfig b/drivers/net/ethernet/socionext/Kconfig index 6bcfe27fc560..b80048ca82a0 100644 --- a/drivers/net/ethernet/socionext/Kconfig +++ b/drivers/net/ethernet/socionext/Kconfig @@ -14,6 +14,8 @@ if NET_VENDOR_SOCIONEXT config SNI_AVE tristate "Socionext AVE ethernet support" depends on (ARCH_UNIPHIER || COMPILE_TEST) && OF + depends on HAS_IOMEM + select MFD_SYSCON select PHYLIB ---help--- Driver for gigabit ethernet MACs, called AVE, in the diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index 0b3b7a460641..f7ecceeb1e28 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -11,6 +11,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/iopoll.h> +#include <linux/mfd/syscon.h> #include <linux/mii.h> #include <linux/module.h> #include <linux/netdevice.h> @@ -18,6 +19,7 @@ #include <linux/of_mdio.h> #include <linux/of_platform.h> #include <linux/phy.h> +#include <linux/regmap.h> #include <linux/reset.h> #include <linux/types.h> #include <linux/u64_stats_sync.h> @@ -197,8 +199,16 @@ #define AVE_INTM_COUNT 20 #define AVE_FORCE_TXINTCNT 1 +/* SG */ +#define SG_ETPINMODE 0x540 +#define SG_ETPINMODE_EXTPHY BIT(1) /* for LD11 */ +#define SG_ETPINMODE_RMII(ins) BIT(ins) + #define IS_DESC_64BIT(p) ((p)->data->is_desc_64bit) +#define AVE_MAX_CLKS 4 +#define AVE_MAX_RSTS 2 + enum desc_id { AVE_DESCID_RX, AVE_DESCID_TX, @@ -225,10 +235,6 @@ struct ave_desc_info { struct ave_desc *desc; /* skb info related descriptor */ }; -struct ave_soc_data { - bool is_desc_64bit; -}; - struct ave_stats { struct u64_stats_sync syncp; u64 packets; @@ -245,11 +251,16 @@ struct ave_private { int phy_id; unsigned int desc_size; u32 msg_enable; - struct clk *clk; - struct reset_control *rst; + int nclks; + struct clk *clk[AVE_MAX_CLKS]; + int nrsts; + struct reset_control *rst[AVE_MAX_RSTS]; phy_interface_t phy_mode; struct phy_device *phydev; struct mii_bus *mdio; + struct regmap *regmap; + unsigned int pinmode_mask; + unsigned int pinmode_val; /* stats */ struct ave_stats stats_rx; @@ -272,6 +283,14 @@ struct ave_private { const struct ave_soc_data *data; }; +struct ave_soc_data { + bool is_desc_64bit; + const char *clock_names[AVE_MAX_CLKS]; + const char *reset_names[AVE_MAX_RSTS]; + int (*get_pinmode)(struct ave_private *priv, + phy_interface_t phy_mode, u32 arg); +}; + static u32 ave_desc_read(struct net_device *ndev, enum desc_id id, int entry, int offset) { @@ -1153,20 +1172,30 @@ static int ave_init(struct net_device *ndev) struct device_node *np = dev->of_node; struct device_node *mdio_np; struct phy_device *phydev; - int ret; + int nc, nr, ret; /* enable clk because of hw access until ndo_open */ - ret = clk_prepare_enable(priv->clk); - if (ret) { - dev_err(dev, "can't enable clock\n"); - return ret; + for (nc = 0; nc < priv->nclks; nc++) { + ret = clk_prepare_enable(priv->clk[nc]); + if (ret) { + dev_err(dev, "can't enable clock\n"); + goto out_clk_disable; + } } - ret = reset_control_deassert(priv->rst); - if (ret) { - dev_err(dev, "can't deassert reset\n"); - goto out_clk_disable; + + for (nr = 0; nr < priv->nrsts; nr++) { + ret = reset_control_deassert(priv->rst[nr]); + if (ret) { + dev_err(dev, "can't deassert reset\n"); + goto out_reset_assert; + } } + ret = regmap_update_bits(priv->regmap, SG_ETPINMODE, + priv->pinmode_mask, priv->pinmode_val); + if (ret) + return ret; + ave_global_reset(ndev); mdio_np = of_get_child_by_name(np, "mdio"); @@ -1207,9 +1236,11 @@ static int ave_init(struct net_device *ndev) out_mdio_unregister: mdiobus_unregister(priv->mdio); out_reset_assert: - reset_control_assert(priv->rst); + while (--nr >= 0) + reset_control_assert(priv->rst[nr]); out_clk_disable: - clk_disable_unprepare(priv->clk); + while (--nc >= 0) + clk_disable_unprepare(priv->clk[nc]); return ret; } @@ -1217,13 +1248,16 @@ out_clk_disable: static void ave_uninit(struct net_device *ndev) { struct ave_private *priv = netdev_priv(ndev); + int i; phy_disconnect(priv->phydev); mdiobus_unregister(priv->mdio); /* disable clk because of hw access after ndo_stop */ - reset_control_assert(priv->rst); - clk_disable_unprepare(priv->clk); + for (i = 0; i < priv->nrsts; i++) + reset_control_assert(priv->rst[i]); + for (i = 0; i < priv->nclks; i++) + clk_disable_unprepare(priv->clk[i]); } static int ave_open(struct net_device *ndev) @@ -1520,6 +1554,7 @@ static int ave_probe(struct platform_device *pdev) const struct ave_soc_data *data; struct device *dev = &pdev->dev; char buf[ETHTOOL_FWVERS_LEN]; + struct of_phandle_args args; phy_interface_t phy_mode; struct ave_private *priv; struct net_device *ndev; @@ -1527,8 +1562,9 @@ static int ave_probe(struct platform_device *pdev) struct resource *res; const void *mac_addr; void __iomem *base; + const char *name; + int i, irq, ret; u64 dma_mask; - int irq, ret; u32 ave_id; data = of_device_get_match_data(dev); @@ -1541,12 +1577,6 @@ static int ave_probe(struct platform_device *pdev) dev_err(dev, "phy-mode not found\n"); return -EINVAL; } - if ((!phy_interface_mode_is_rgmii(phy_mode)) && - phy_mode != PHY_INTERFACE_MODE_RMII && - phy_mode != PHY_INTERFACE_MODE_MII) { - dev_err(dev, "phy-mode is invalid\n"); - return -EINVAL; - } irq = platform_get_irq(pdev, 0); if (irq < 0) { @@ -1614,15 +1644,47 @@ static int ave_probe(struct platform_device *pdev) u64_stats_init(&priv->stats_tx.syncp); u64_stats_init(&priv->stats_rx.syncp); - priv->clk = devm_clk_get(dev, NULL); - if (IS_ERR(priv->clk)) { - ret = PTR_ERR(priv->clk); - goto out_free_netdev; + for (i = 0; i < AVE_MAX_CLKS; i++) { + name = priv->data->clock_names[i]; + if (!name) + break; + priv->clk[i] = devm_clk_get(dev, name); + if (IS_ERR(priv->clk[i])) { + ret = PTR_ERR(priv->clk[i]); + goto out_free_netdev; + } + priv->nclks++; + } + + for (i = 0; i < AVE_MAX_RSTS; i++) { + name = priv->data->reset_names[i]; + if (!name) + break; + priv->rst[i] = devm_reset_control_get_shared(dev, name); + if (IS_ERR(priv->rst[i])) { + ret = PTR_ERR(priv->rst[i]); + goto out_free_netdev; + } + priv->nrsts++; } - priv->rst = devm_reset_control_get_optional_shared(dev, NULL); - if (IS_ERR(priv->rst)) { - ret = PTR_ERR(priv->rst); + ret = of_parse_phandle_with_fixed_args(np, + "socionext,syscon-phy-mode", + 1, 0, &args); + if (ret) { + netdev_err(ndev, "can't get syscon-phy-mode property\n"); + goto out_free_netdev; + } + priv->regmap = syscon_node_to_regmap(args.np); + of_node_put(args.np); + if (IS_ERR(priv->regmap)) { + netdev_err(ndev, "can't map syscon-phy-mode\n"); + ret = PTR_ERR(priv->regmap); + goto out_free_netdev; + } + ret = priv->data->get_pinmode(priv, phy_mode, args.args[0]); + if (ret) { + netdev_err(ndev, "invalid phy-mode setting\n"); goto out_free_netdev; } @@ -1685,24 +1747,148 @@ static int ave_remove(struct platform_device *pdev) return 0; } +static int ave_pro4_get_pinmode(struct ave_private *priv, + phy_interface_t phy_mode, u32 arg) +{ + if (arg > 0) + return -EINVAL; + + priv->pinmode_mask = SG_ETPINMODE_RMII(0); + + switch (phy_mode) { + case PHY_INTERFACE_MODE_RMII: + priv->pinmode_val = SG_ETPINMODE_RMII(0); + break; + case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_RGMII: + priv->pinmode_val = 0; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int ave_ld11_get_pinmode(struct ave_private *priv, + phy_interface_t phy_mode, u32 arg) +{ + if (arg > 0) + return -EINVAL; + + priv->pinmode_mask = SG_ETPINMODE_EXTPHY | SG_ETPINMODE_RMII(0); + + switch (phy_mode) { + case PHY_INTERFACE_MODE_INTERNAL: + priv->pinmode_val = 0; + break; + case PHY_INTERFACE_MODE_RMII: + priv->pinmode_val = SG_ETPINMODE_EXTPHY | SG_ETPINMODE_RMII(0); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int ave_ld20_get_pinmode(struct ave_private *priv, + phy_interface_t phy_mode, u32 arg) +{ + if (arg > 0) + return -EINVAL; + + priv->pinmode_mask = SG_ETPINMODE_RMII(0); + + switch (phy_mode) { + case PHY_INTERFACE_MODE_RMII: + priv->pinmode_val = SG_ETPINMODE_RMII(0); + break; + case PHY_INTERFACE_MODE_RGMII: + priv->pinmode_val = 0; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int ave_pxs3_get_pinmode(struct ave_private *priv, + phy_interface_t phy_mode, u32 arg) +{ + if (arg > 1) + return -EINVAL; + + priv->pinmode_mask = SG_ETPINMODE_RMII(arg); + + switch (phy_mode) { + case PHY_INTERFACE_MODE_RMII: + priv->pinmode_val = SG_ETPINMODE_RMII(arg); + break; + case PHY_INTERFACE_MODE_RGMII: + priv->pinmode_val = 0; + break; + default: + return -EINVAL; + } + + return 0; +} + static const struct ave_soc_data ave_pro4_data = { .is_desc_64bit = false, + .clock_names = { + "gio", "ether", "ether-gb", "ether-phy", + }, + .reset_names = { + "gio", "ether", + }, + .get_pinmode = ave_pro4_get_pinmode, }; static const struct ave_soc_data ave_pxs2_data = { .is_desc_64bit = false, + .clock_names = { + "ether", + }, + .reset_names = { + "ether", + }, + .get_pinmode = ave_pro4_get_pinmode, }; static const struct ave_soc_data ave_ld11_data = { .is_desc_64bit = false, + .clock_names = { + "ether", + }, + .reset_names = { + "ether", + }, + .get_pinmode = ave_ld11_get_pinmode, }; static const struct ave_soc_data ave_ld20_data = { .is_desc_64bit = true, + .clock_names = { + "ether", + }, + .reset_names = { + "ether", + }, + .get_pinmode = ave_ld20_get_pinmode, }; static const struct ave_soc_data ave_pxs3_data = { .is_desc_64bit = false, + .clock_names = { + "ether", + }, + .reset_names = { + "ether", + }, + .get_pinmode = ave_pxs3_get_pinmode, }; static const struct of_device_id of_ave_match[] = { diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h index 8900a6fad318..c4ffdf47bad5 100644 --- a/drivers/net/ethernet/ti/netcp.h +++ b/drivers/net/ethernet/ti/netcp.h @@ -33,6 +33,8 @@ #define SGMII_LINK_MAC_MAC_FORCED 2 #define SGMII_LINK_MAC_FIBER 3 #define SGMII_LINK_MAC_PHY_NO_MDIO 4 +#define RGMII_LINK_MAC_PHY 5 +#define RGMII_LINK_MAC_PHY_NO_MDIO 7 #define XGMII_LINK_MAC_PHY 10 #define XGMII_LINK_MAC_MAC_FORCED 11 @@ -212,6 +214,7 @@ struct netcp_module { int (*add_vid)(void *intf_priv, int vid); int (*del_vid)(void *intf_priv, int vid); int (*ioctl)(void *intf_priv, struct ifreq *req, int cmd); + int (*set_rx_mode)(void *intf_priv, bool promisc); /* used internally */ struct list_head module_list; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index f5a7eb22d0f5..e40aa3e31af2 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1509,6 +1509,24 @@ static void netcp_addr_sweep_add(struct netcp_intf *netcp) } } +static int netcp_set_promiscuous(struct netcp_intf *netcp, bool promisc) +{ + struct netcp_intf_modpriv *priv; + struct netcp_module *module; + int error; + + for_each_module(netcp, priv) { + module = priv->netcp_module; + if (!module->set_rx_mode) + continue; + + error = module->set_rx_mode(priv->module_priv, promisc); + if (error) + return error; + } + return 0; +} + static void netcp_set_rx_mode(struct net_device *ndev) { struct netcp_intf *netcp = netdev_priv(ndev); @@ -1538,6 +1556,7 @@ static void netcp_set_rx_mode(struct net_device *ndev) /* finally sweep and callout into modules */ netcp_addr_sweep_del(netcp); netcp_addr_sweep_add(netcp); + netcp_set_promiscuous(netcp, promisc); spin_unlock(&netcp->lock); } @@ -2155,8 +2174,13 @@ static int netcp_probe(struct platform_device *pdev) struct device_node *child, *interfaces; struct netcp_device *netcp_device; struct device *dev = &pdev->dev; + struct netcp_module *module; int ret; + if (!knav_dma_device_ready() || + !knav_qmss_device_ready()) + return -EPROBE_DEFER; + if (!node) { dev_err(dev, "could not find device info\n"); return -ENODEV; @@ -2203,6 +2227,14 @@ static int netcp_probe(struct platform_device *pdev) /* Add the device instance to the list */ list_add_tail(&netcp_device->device_list, &netcp_devices); + /* Probe & attach any modules already registered */ + mutex_lock(&netcp_modules_lock); + for_each_netcp_module(module) { + ret = netcp_module_probe(netcp_device, module); + if (ret < 0) + dev_err(dev, "module(%s) probe failed\n", module->name); + } + mutex_unlock(&netcp_modules_lock); return 0; probe_quit_interface: diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 56dbc0b9fedc..6a728d35e776 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -21,6 +21,7 @@ #include <linux/io.h> #include <linux/module.h> #include <linux/of_mdio.h> +#include <linux/of_net.h> #include <linux/of_address.h> #include <linux/if_vlan.h> #include <linux/ptp_classify.h> @@ -42,7 +43,7 @@ /* 1G Ethernet SS defines */ #define GBE_MODULE_NAME "netcp-gbe" -#define GBE_SS_VERSION_14 0x4ed21104 +#define GBE_SS_VERSION_14 0x4ed2 #define GBE_SS_REG_INDEX 0 #define GBE_SGMII34_REG_INDEX 1 @@ -72,6 +73,11 @@ #define IS_SS_ID_NU(d) \ (GBE_IDENT((d)->ss_version) == GBE_SS_ID_NU) +#define IS_SS_ID_VER_14(d) \ + (GBE_IDENT((d)->ss_version) == GBE_SS_VERSION_14) +#define IS_SS_ID_2U(d) \ + (GBE_IDENT((d)->ss_version) == GBE_SS_ID_2U) + #define GBENU_SS_REG_INDEX 0 #define GBENU_SM_REG_INDEX 1 #define GBENU_SGMII_MODULE_OFFSET 0x100 @@ -86,7 +92,7 @@ /* 10G Ethernet SS defines */ #define XGBE_MODULE_NAME "netcp-xgbe" -#define XGBE_SS_VERSION_10 0x4ee42100 +#define XGBE_SS_VERSION_10 0x4ee4 #define XGBE_SS_REG_INDEX 0 #define XGBE_SM_REG_INDEX 1 @@ -166,6 +172,11 @@ #define GBE_RXHOOK_ORDER 0 #define GBE_DEFAULT_ALE_AGEOUT 30 #define SLAVE_LINK_IS_XGMII(s) ((s)->link_interface >= XGMII_LINK_MAC_PHY) +#define SLAVE_LINK_IS_RGMII(s) \ + (((s)->link_interface >= RGMII_LINK_MAC_PHY) && \ + ((s)->link_interface <= RGMII_LINK_MAC_PHY_NO_MDIO)) +#define SLAVE_LINK_IS_SGMII(s) \ + ((s)->link_interface <= SGMII_LINK_MAC_PHY_NO_MDIO) #define NETCP_LINK_STATE_INVALID -1 #define GBE_SET_REG_OFS(p, rb, rn) p->rb##_ofs.rn = \ @@ -549,6 +560,7 @@ struct gbe_ss_regs { struct gbe_ss_regs_ofs { u16 id_ver; u16 control; + u16 rgmii_status; /* 2U */ }; struct gbe_switch_regs { @@ -591,6 +603,7 @@ struct gbe_port_regs { struct gbe_port_regs_ofs { u16 port_vlan; u16 tx_pri_map; + u16 rx_pri_map; u16 sa_lo; u16 sa_hi; u16 ts_ctl; @@ -695,6 +708,7 @@ struct gbe_slave { u32 link_interface; u32 mac_control; u8 phy_port_t; + struct device_node *node; struct device_node *phy_node; struct ts_ctl ts_ctl; struct list_head slave_list; @@ -1915,7 +1929,7 @@ static void keystone_get_ethtool_stats(struct net_device *ndev, gbe_dev = gbe_intf->gbe_dev; spin_lock_bh(&gbe_dev->hw_stats_lock); - if (gbe_dev->ss_version == GBE_SS_VERSION_14) + if (IS_SS_ID_VER_14(gbe_dev)) gbe_update_stats_ver14(gbe_dev, data); else gbe_update_stats(gbe_dev, data); @@ -2091,8 +2105,9 @@ static void netcp_ethss_link_state_action(struct gbe_priv *gbe_dev, ALE_PORT_STATE_FORWARD); if (ndev && slave->open && - slave->link_interface != SGMII_LINK_MAC_PHY && - slave->link_interface != XGMII_LINK_MAC_PHY) + ((slave->link_interface != SGMII_LINK_MAC_PHY) && + (slave->link_interface != RGMII_LINK_MAC_PHY) && + (slave->link_interface != XGMII_LINK_MAC_PHY))) netif_carrier_on(ndev); } else { writel(mac_control, GBE_REG_ADDR(slave, emac_regs, @@ -2101,8 +2116,9 @@ static void netcp_ethss_link_state_action(struct gbe_priv *gbe_dev, ALE_PORT_STATE, ALE_PORT_STATE_DISABLE); if (ndev && - slave->link_interface != SGMII_LINK_MAC_PHY && - slave->link_interface != XGMII_LINK_MAC_PHY) + ((slave->link_interface != SGMII_LINK_MAC_PHY) && + (slave->link_interface != RGMII_LINK_MAC_PHY) && + (slave->link_interface != XGMII_LINK_MAC_PHY))) netif_carrier_off(ndev); } @@ -2115,23 +2131,35 @@ static bool gbe_phy_link_status(struct gbe_slave *slave) return !slave->phy || slave->phy->link; } +#define RGMII_REG_STATUS_LINK BIT(0) + +static void netcp_2u_rgmii_get_port_link(struct gbe_priv *gbe_dev, bool *status) +{ + u32 val = 0; + + val = readl(GBE_REG_ADDR(gbe_dev, ss_regs, rgmii_status)); + *status = !!(val & RGMII_REG_STATUS_LINK); +} + static void netcp_ethss_update_link_state(struct gbe_priv *gbe_dev, struct gbe_slave *slave, struct net_device *ndev) { - int sp = slave->slave_num; - int phy_link_state, sgmii_link_state = 1, link_state; + bool sw_link_state = true, phy_link_state; + int sp = slave->slave_num, link_state; if (!slave->open) return; - if (!SLAVE_LINK_IS_XGMII(slave)) { - sgmii_link_state = - netcp_sgmii_get_port_link(SGMII_BASE(gbe_dev, sp), sp); - } + if (SLAVE_LINK_IS_RGMII(slave)) + netcp_2u_rgmii_get_port_link(gbe_dev, + &sw_link_state); + if (SLAVE_LINK_IS_SGMII(slave)) + sw_link_state = + netcp_sgmii_get_port_link(SGMII_BASE(gbe_dev, sp), sp); phy_link_state = gbe_phy_link_status(slave); - link_state = phy_link_state & sgmii_link_state; + link_state = phy_link_state & sw_link_state; if (atomic_xchg(&slave->link_state, link_state) != link_state) netcp_ethss_link_state_action(gbe_dev, ndev, slave, @@ -2205,7 +2233,7 @@ static void gbe_port_config(struct gbe_priv *gbe_dev, struct gbe_slave *slave, max_rx_len = NETCP_MAX_FRAME_SIZE; /* Enable correct MII mode at SS level */ - if ((gbe_dev->ss_version == XGBE_SS_VERSION_10) && + if (IS_SS_ID_XGBE(gbe_dev) && (slave->link_interface >= XGMII_LINK_MAC_PHY)) { xgmii_mode = readl(GBE_REG_ADDR(gbe_dev, ss_regs, control)); xgmii_mode |= (1 << slave->slave_num); @@ -2236,7 +2264,8 @@ static void gbe_slave_stop(struct gbe_intf *intf) struct gbe_priv *gbe_dev = intf->gbe_dev; struct gbe_slave *slave = intf->slave; - gbe_sgmii_rtreset(gbe_dev, slave, true); + if (!IS_SS_ID_2U(gbe_dev)) + gbe_sgmii_rtreset(gbe_dev, slave, true); gbe_port_reset(slave); /* Disable forwarding */ cpsw_ale_control_set(gbe_dev->ale, slave->port_num, @@ -2271,11 +2300,20 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf) void (*hndlr)(struct net_device *) = gbe_adjust_link; - gbe_sgmii_config(priv, slave); + if (!IS_SS_ID_2U(priv)) + gbe_sgmii_config(priv, slave); gbe_port_reset(slave); - gbe_sgmii_rtreset(priv, slave, false); + if (!IS_SS_ID_2U(priv)) + gbe_sgmii_rtreset(priv, slave, false); gbe_port_config(priv, slave, priv->rx_packet_max); gbe_set_slave_mac(slave, gbe_intf); + /* For NU & 2U switch, map the vlan priorities to zero + * as we only configure to use priority 0 + */ + if (IS_SS_ID_MU(priv)) + writel(HOST_TX_PRI_MAP_DEFAULT, + GBE_REG_ADDR(slave, port_regs, rx_pri_map)); + /* enable forwarding */ cpsw_ale_control_set(priv->ale, slave->port_num, ALE_PORT_STATE, ALE_PORT_STATE_FORWARD); @@ -2286,6 +2324,21 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf) has_phy = true; phy_mode = PHY_INTERFACE_MODE_SGMII; slave->phy_port_t = PORT_MII; + } else if (slave->link_interface == RGMII_LINK_MAC_PHY) { + has_phy = true; + phy_mode = of_get_phy_mode(slave->node); + /* if phy-mode is not present, default to + * PHY_INTERFACE_MODE_RGMII + */ + if (phy_mode < 0) + phy_mode = PHY_INTERFACE_MODE_RGMII; + + if (!phy_interface_mode_is_rgmii(phy_mode)) { + dev_err(priv->dev, + "Unsupported phy mode %d\n", phy_mode); + return -EINVAL; + } + slave->phy_port_t = PORT_MII; } else if (slave->link_interface == XGMII_LINK_MAC_PHY) { has_phy = true; phy_mode = PHY_INTERFACE_MODE_NA; @@ -2293,7 +2346,7 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf) } if (has_phy) { - if (priv->ss_version == XGBE_SS_VERSION_10) + if (IS_SS_ID_XGBE(priv)) hndlr = xgbe_adjust_link; slave->phy = of_phy_connect(gbe_intf->ndev, @@ -2722,6 +2775,61 @@ static inline int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *req) } #endif /* CONFIG_TI_CPTS */ +static int gbe_set_rx_mode(void *intf_priv, bool promisc) +{ + struct gbe_intf *gbe_intf = intf_priv; + struct gbe_priv *gbe_dev = gbe_intf->gbe_dev; + struct cpsw_ale *ale = gbe_dev->ale; + unsigned long timeout; + int i, ret = -ETIMEDOUT; + + /* Disable(1)/Enable(0) Learn for all ports (host is port 0 and + * slaves are port 1 and up + */ + for (i = 0; i <= gbe_dev->num_slaves; i++) { + cpsw_ale_control_set(ale, i, + ALE_PORT_NOLEARN, !!promisc); + cpsw_ale_control_set(ale, i, + ALE_PORT_NO_SA_UPDATE, !!promisc); + } + + if (!promisc) { + /* Don't Flood All Unicast Packets to Host port */ + cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 0); + dev_vdbg(gbe_dev->dev, "promiscuous mode disabled\n"); + return 0; + } + + timeout = jiffies + HZ; + + /* Clear All Untouched entries */ + cpsw_ale_control_set(ale, 0, ALE_AGEOUT, 1); + do { + cpu_relax(); + if (cpsw_ale_control_get(ale, 0, ALE_AGEOUT)) { + ret = 0; + break; + } + + } while (time_after(timeout, jiffies)); + + /* Make sure it is not a false timeout */ + if (ret && !cpsw_ale_control_get(ale, 0, ALE_AGEOUT)) + return ret; + + cpsw_ale_control_set(ale, 0, ALE_AGEOUT, 1); + + /* Clear all mcast from ALE */ + cpsw_ale_flush_multicast(ale, + GBE_PORT_MASK(gbe_dev->ale_ports), + -1); + + /* Flood All Unicast Packets to Host port */ + cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 1); + dev_vdbg(gbe_dev->dev, "promiscuous mode enabled\n"); + return ret; +} + static int gbe_ioctl(void *intf_priv, struct ifreq *req, int cmd) { struct gbe_intf *gbe_intf = intf_priv; @@ -2764,7 +2872,7 @@ static void netcp_ethss_timer(struct timer_list *t) /* A timer runs as a BH, no need to block them */ spin_lock(&gbe_dev->hw_stats_lock); - if (gbe_dev->ss_version == GBE_SS_VERSION_14) + if (IS_SS_ID_VER_14(gbe_dev)) gbe_update_stats_ver14(gbe_dev, NULL); else gbe_update_stats(gbe_dev, NULL); @@ -2807,7 +2915,7 @@ static int gbe_open(void *intf_priv, struct net_device *ndev) GBE_RTL_VERSION(reg), GBE_IDENT(reg)); /* For 10G and on NetCP 1.5, use directed to port */ - if ((gbe_dev->ss_version == XGBE_SS_VERSION_10) || IS_SS_ID_MU(gbe_dev)) + if (IS_SS_ID_XGBE(gbe_dev) || IS_SS_ID_MU(gbe_dev)) gbe_intf->tx_pipe.flags = SWITCH_TO_PORT_IN_TAGINFO; if (gbe_dev->enable_ale) @@ -2911,8 +3019,10 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, slave->link_interface = SGMII_LINK_MAC_PHY; } + slave->node = node; slave->open = false; if ((slave->link_interface == SGMII_LINK_MAC_PHY) || + (slave->link_interface == RGMII_LINK_MAC_PHY) || (slave->link_interface == XGMII_LINK_MAC_PHY)) slave->phy_node = of_parse_phandle(node, "phy-handle", 0); slave->port_num = gbe_get_slave_port(gbe_dev, slave->slave_num); @@ -2924,7 +3034,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, /* Emac regs memmap are contiguous but port regs are not */ port_reg_num = slave->slave_num; - if (gbe_dev->ss_version == GBE_SS_VERSION_14) { + if (IS_SS_ID_VER_14(gbe_dev)) { if (slave->slave_num > 1) { port_reg_ofs = GBE13_SLAVE_PORT2_OFFSET; port_reg_num -= 2; @@ -2939,7 +3049,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, emac_reg_ofs = GBENU_EMAC_OFFSET; port_reg_blk_sz = 0x1000; emac_reg_blk_sz = 0x1000; - } else if (gbe_dev->ss_version == XGBE_SS_VERSION_10) { + } else if (IS_SS_ID_XGBE(gbe_dev)) { port_reg_ofs = XGBE10_SLAVE_PORT_OFFSET; emac_reg_ofs = XGBE10_EMAC_OFFSET; port_reg_blk_sz = 0x30; @@ -2955,7 +3065,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, slave->emac_regs = gbe_dev->switch_regs + emac_reg_ofs + (emac_reg_blk_sz * slave->slave_num); - if (gbe_dev->ss_version == GBE_SS_VERSION_14) { + if (IS_SS_ID_VER_14(gbe_dev)) { /* Initialize slave port register offsets */ GBE_SET_REG_OFS(slave, port_regs, port_vlan); GBE_SET_REG_OFS(slave, port_regs, tx_pri_map); @@ -2976,6 +3086,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, /* Initialize slave port register offsets */ GBENU_SET_REG_OFS(slave, port_regs, port_vlan); GBENU_SET_REG_OFS(slave, port_regs, tx_pri_map); + GBENU_SET_REG_OFS(slave, port_regs, rx_pri_map); GBENU_SET_REG_OFS(slave, port_regs, sa_lo); GBENU_SET_REG_OFS(slave, port_regs, sa_hi); GBENU_SET_REG_OFS(slave, port_regs, ts_ctl); @@ -2989,7 +3100,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, GBENU_SET_REG_OFS(slave, emac_regs, mac_control); GBENU_SET_REG_OFS(slave, emac_regs, soft_reset); - } else if (gbe_dev->ss_version == XGBE_SS_VERSION_10) { + } else if (IS_SS_ID_XGBE(gbe_dev)) { /* Initialize slave port register offsets */ XGBE_SET_REG_OFS(slave, port_regs, port_vlan); XGBE_SET_REG_OFS(slave, port_regs, tx_pri_map); @@ -3039,7 +3150,8 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev, continue; } - gbe_sgmii_config(gbe_dev, slave); + if (!IS_SS_ID_2U(gbe_dev)) + gbe_sgmii_config(gbe_dev, slave); gbe_port_reset(slave); gbe_port_config(gbe_dev, slave, gbe_dev->rx_packet_max); list_add_tail(&slave->slave_list, &gbe_dev->secondary_slaves); @@ -3073,6 +3185,9 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev, if (slave->link_interface == SGMII_LINK_MAC_PHY) { phy_mode = PHY_INTERFACE_MODE_SGMII; slave->phy_port_t = PORT_MII; + } else if (slave->link_interface == RGMII_LINK_MAC_PHY) { + phy_mode = PHY_INTERFACE_MODE_RGMII; + slave->phy_port_t = PORT_MII; } else { phy_mode = PHY_INTERFACE_MODE_NA; slave->phy_port_t = PORT_FIBRE; @@ -3080,6 +3195,7 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev, for_each_sec_slave(slave, gbe_dev) { if ((slave->link_interface != SGMII_LINK_MAC_PHY) && + (slave->link_interface != RGMII_LINK_MAC_PHY) && (slave->link_interface != XGMII_LINK_MAC_PHY)) continue; slave->phy = @@ -3355,7 +3471,7 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev, gbe_dev->num_stats_mods = gbe_dev->max_num_ports; gbe_dev->et_stats = gbenu_et_stats; - if (IS_SS_ID_NU(gbe_dev)) + if (IS_SS_ID_MU(gbe_dev)) gbe_dev->num_et_stats = GBENU_ET_STATS_HOST_SIZE + (gbe_dev->max_num_slaves * GBENU_ET_STATS_PORT_SIZE); else @@ -3396,7 +3512,9 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev, } gbe_dev->switch_regs = regs; - gbe_dev->sgmii_port_regs = gbe_dev->ss_regs + GBENU_SGMII_MODULE_OFFSET; + if (!IS_SS_ID_2U(gbe_dev)) + gbe_dev->sgmii_port_regs = + gbe_dev->ss_regs + GBENU_SGMII_MODULE_OFFSET; /* Although sgmii modules are mem mapped to one contiguous * region on GBENU devices, setting sgmii_port34_regs allows @@ -3419,6 +3537,8 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev, /* Subsystem registers */ GBENU_SET_REG_OFS(gbe_dev, ss_regs, id_ver); + /* ok to set for MU, but used by 2U only */ + GBENU_SET_REG_OFS(gbe_dev, ss_regs, rgmii_status); /* Switch module registers */ GBENU_SET_REG_OFS(gbe_dev, switch_regs, id_ver); @@ -3464,6 +3584,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, gbe_dev->max_num_slaves = 8; } else if (of_device_is_compatible(node, "ti,netcp-gbe-2")) { gbe_dev->max_num_slaves = 1; + gbe_module.set_rx_mode = gbe_set_rx_mode; } else if (of_device_is_compatible(node, "ti,netcp-xgbe")) { gbe_dev->max_num_slaves = 2; } else { @@ -3508,7 +3629,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, dev_dbg(dev, "ss_version: 0x%08x\n", gbe_dev->ss_version); - if (gbe_dev->ss_version == GBE_SS_VERSION_14) + if (IS_SS_ID_VER_14(gbe_dev)) ret = set_gbe_ethss14_priv(gbe_dev, node); else if (IS_SS_ID_MU(gbe_dev)) ret = set_gbenu_ethss_priv(gbe_dev, node); @@ -3606,7 +3727,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, spin_lock_bh(&gbe_dev->hw_stats_lock); for (i = 0; i < gbe_dev->num_stats_mods; i++) { - if (gbe_dev->ss_version == GBE_SS_VERSION_14) + if (IS_SS_ID_VER_14(gbe_dev)) gbe_reset_mod_stats_ver14(gbe_dev, i); else gbe_reset_mod_stats(gbe_dev, i); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index b919e89a9b93..750eaa53bf0c 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -36,6 +36,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); #define GENEVE_VER 0 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) +#define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN) +#define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN) /* per-network namespace private data for this module */ struct geneve_net { @@ -826,8 +828,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, return PTR_ERR(rt); if (skb_dst(skb)) { - int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) - - GENEVE_BASE_HLEN - info->options_len - 14; + int mtu = dst_mtu(&rt->dst) - GENEVE_IPV4_HLEN - + info->options_len; skb_dst_update_pmtu(skb, mtu); } @@ -872,8 +874,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, return PTR_ERR(dst); if (skb_dst(skb)) { - int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) - - GENEVE_BASE_HLEN - info->options_len - 14; + int mtu = dst_mtu(dst) - GENEVE_IPV6_HLEN - info->options_len; skb_dst_update_pmtu(skb, mtu); } @@ -941,11 +942,10 @@ tx_error: static int geneve_change_mtu(struct net_device *dev, int new_mtu) { - /* Only possible if called internally, ndo_change_mtu path's new_mtu - * is guaranteed to be between dev->min_mtu and dev->max_mtu. - */ if (new_mtu > dev->max_mtu) new_mtu = dev->max_mtu; + else if (new_mtu < dev->min_mtu) + new_mtu = dev->min_mtu; dev->mtu = new_mtu; return 0; @@ -1261,7 +1261,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], } if (data[IFLA_GENEVE_REMOTE6]) { - #if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) if (changelink && (ip_tunnel_info_af(info) == AF_INET)) { attrtype = IFLA_GENEVE_REMOTE6; goto change_notsup; @@ -1387,6 +1387,48 @@ change_notsup: return -EOPNOTSUPP; } +static void geneve_link_config(struct net_device *dev, + struct ip_tunnel_info *info, struct nlattr *tb[]) +{ + struct geneve_dev *geneve = netdev_priv(dev); + int ldev_mtu = 0; + + if (tb[IFLA_MTU]) { + geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + return; + } + + switch (ip_tunnel_info_af(info)) { + case AF_INET: { + struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst }; + struct rtable *rt = ip_route_output_key(geneve->net, &fl4); + + if (!IS_ERR(rt) && rt->dst.dev) { + ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN; + ip_rt_put(rt); + } + break; + } +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: { + struct rt6_info *rt = rt6_lookup(geneve->net, + &info->key.u.ipv6.dst, NULL, 0, + NULL, 0); + + if (rt && rt->dst.dev) + ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; + ip6_rt_put(rt); + break; + } +#endif + } + + if (ldev_mtu <= 0) + return; + + geneve_change_mtu(dev, ldev_mtu - info->options_len); +} + static int geneve_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -1402,8 +1444,14 @@ static int geneve_newlink(struct net *net, struct net_device *dev, if (err) return err; - return geneve_configure(net, dev, extack, &info, metadata, - use_udp6_rx_checksums); + err = geneve_configure(net, dev, extack, &info, metadata, + use_udp6_rx_checksums); + if (err) + return err; + + geneve_link_config(dev, &info, tb); + + return 0; } /* Quiesces the geneve device data path for both TX and RX. @@ -1477,8 +1525,10 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], if (err) return err; - if (!geneve_dst_addr_equal(&geneve->info, &info)) + if (!geneve_dst_addr_equal(&geneve->info, &info)) { dst_cache_reset(&info.dst_cache); + geneve_link_config(dev, &info, tb); + } geneve_quiesce(geneve, &gs4, &gs6); geneve->info = info; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 960f06141472..6ebe39a3dde6 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -237,6 +237,8 @@ void netvsc_switch_datapath(struct net_device *nv_dev, bool vf); #define NVSP_PROTOCOL_VERSION_2 0x30002 #define NVSP_PROTOCOL_VERSION_4 0x40000 #define NVSP_PROTOCOL_VERSION_5 0x50000 +#define NVSP_PROTOCOL_VERSION_6 0x60000 +#define NVSP_PROTOCOL_VERSION_61 0x60001 enum { NVSP_MSG_TYPE_NONE = 0, @@ -308,6 +310,12 @@ enum { NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE, NVSP_MSG5_MAX = NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE, + + /* Version 6 messages */ + NVSP_MSG6_TYPE_PD_API, + NVSP_MSG6_TYPE_PD_POST_BATCH, + + NVSP_MSG6_MAX = NVSP_MSG6_TYPE_PD_POST_BATCH }; enum { @@ -619,12 +627,168 @@ union nvsp_5_message_uber { struct nvsp_5_send_indirect_table send_table; } __packed; +enum nvsp_6_pd_api_op { + PD_API_OP_CONFIG = 1, + PD_API_OP_SW_DATAPATH, /* Switch Datapath */ + PD_API_OP_OPEN_PROVIDER, + PD_API_OP_CLOSE_PROVIDER, + PD_API_OP_CREATE_QUEUE, + PD_API_OP_FLUSH_QUEUE, + PD_API_OP_FREE_QUEUE, + PD_API_OP_ALLOC_COM_BUF, /* Allocate Common Buffer */ + PD_API_OP_FREE_COM_BUF, /* Free Common Buffer */ + PD_API_OP_MAX +}; + +struct grp_affinity { + u64 mask; + u16 grp; + u16 reserved[3]; +} __packed; + +struct nvsp_6_pd_api_req { + u32 op; + + union { + /* MMIO information is sent from the VM to VSP */ + struct __packed { + u64 mmio_pa; /* MMIO Physical Address */ + u32 mmio_len; + + /* Number of PD queues a VM can support */ + u16 num_subchn; + } config; + + /* Switch Datapath */ + struct __packed { + /* Host Datapath Is PacketDirect */ + u8 host_dpath_is_pd; + + /* Guest PacketDirect Is Enabled */ + u8 guest_pd_enabled; + } sw_dpath; + + /* Open Provider*/ + struct __packed { + u32 prov_id; /* Provider id */ + u32 flag; + } open_prov; + + /* Close Provider */ + struct __packed { + u32 prov_id; + } cls_prov; + + /* Create Queue*/ + struct __packed { + u32 prov_id; + u16 q_id; + u16 q_size; + u8 is_recv_q; + u8 is_rss_q; + u32 recv_data_len; + struct grp_affinity affy; + } cr_q; + + /* Delete Queue*/ + struct __packed { + u32 prov_id; + u16 q_id; + } del_q; + + /* Flush Queue */ + struct __packed { + u32 prov_id; + u16 q_id; + } flush_q; + + /* Allocate Common Buffer */ + struct __packed { + u32 len; + u32 pf_node; /* Preferred Node */ + u16 region_id; + } alloc_com_buf; + + /* Free Common Buffer */ + struct __packed { + u32 len; + u64 pa; /* Physical Address */ + u32 pf_node; /* Preferred Node */ + u16 region_id; + u8 cache_type; + } free_com_buf; + } __packed; +} __packed; + +struct nvsp_6_pd_api_comp { + u32 op; + u32 status; + + union { + struct __packed { + /* actual number of PD queues allocated to the VM */ + u16 num_pd_q; + + /* Num Receive Rss PD Queues */ + u8 num_rss_q; + + u8 is_supported; /* Is supported by VSP */ + u8 is_enabled; /* Is enabled by VSP */ + } config; + + /* Open Provider */ + struct __packed { + u32 prov_id; + } open_prov; + + /* Create Queue */ + struct __packed { + u32 prov_id; + u16 q_id; + u16 q_size; + u32 recv_data_len; + struct grp_affinity affy; + } cr_q; + + /* Allocate Common Buffer */ + struct __packed { + u64 pa; /* Physical Address */ + u32 len; + u32 pf_node; /* Preferred Node */ + u16 region_id; + u8 cache_type; + } alloc_com_buf; + } __packed; +} __packed; + +struct nvsp_6_pd_buf { + u32 region_offset; + u16 region_id; + u16 is_partial:1; + u16 reserved:15; +} __packed; + +struct nvsp_6_pd_batch_msg { + struct nvsp_message_header hdr; + u16 count; + u16 guest2host:1; + u16 is_recv:1; + u16 reserved:14; + struct nvsp_6_pd_buf pd_buf[0]; +} __packed; + +union nvsp_6_message_uber { + struct nvsp_6_pd_api_req pd_req; + struct nvsp_6_pd_api_comp pd_comp; +} __packed; + union nvsp_all_messages { union nvsp_message_init_uber init_msg; union nvsp_1_message_uber v1_msg; union nvsp_2_message_uber v2_msg; union nvsp_4_message_uber v4_msg; union nvsp_5_message_uber v5_msg; + union nvsp_6_message_uber v6_msg; } __packed; /* ALL Messages */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 04f611e6f678..e7308958b7a9 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -525,7 +525,8 @@ static int netvsc_connect_vsp(struct hv_device *device, struct net_device *ndev = hv_get_drvdata(device); static const u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, - NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 + NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5, + NVSP_PROTOCOL_VERSION_6, NVSP_PROTOCOL_VERSION_61 }; struct nvsp_message *init_packet; int ndis_version, i, ret; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 6b127be781d9..3b6dbacaf77d 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -29,6 +29,7 @@ #include <linux/nls.h> #include <linux/vmalloc.h> #include <linux/rtnetlink.h> +#include <linux/ucs2_string.h> #include "hyperv_net.h" #include "netvsc_trace.h" @@ -1223,6 +1224,29 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, return ret; } +static void rndis_get_friendly_name(struct net_device *net, + struct rndis_device *rndis_device, + struct netvsc_device *net_device) +{ + ucs2_char_t wname[256]; + unsigned long len; + u8 ifalias[256]; + u32 size; + + size = sizeof(wname); + if (rndis_filter_query_device(rndis_device, net_device, + RNDIS_OID_GEN_FRIENDLY_NAME, + wname, &size) != 0) + return; + + /* Convert Windows Unicode string to UTF-8 */ + len = ucs2_as_utf8(ifalias, wname, sizeof(ifalias)); + + /* ignore the default value from host */ + if (strcmp(ifalias, "Network Adapter") != 0) + dev_set_alias(net, ifalias, len); +} + struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, struct netvsc_device_info *device_info) { @@ -1276,6 +1300,10 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN); + /* Get friendly name as ifalias*/ + if (!net->ifalias) + rndis_get_friendly_name(net, rndis_device, net_device); + /* Query and set hardware capabilities */ ret = rndis_netdev_set_hwcaps(rndis_device, net_device); if (ret != 0) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index bdfbabb86ee0..edb8b9ab827f 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -218,6 +218,12 @@ config AQUANTIA_PHY ---help--- Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405 +config ASIX_PHY + tristate "Asix PHYs" + help + Currently supports the Asix Electronics PHY found in the X-Surf 100 + AX88796B package. + config AT803X_PHY tristate "AT803X PHYs" ---help--- diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 01acbcb2c798..701ca0b8717e 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -45,6 +45,7 @@ obj-y += $(sfp-obj-y) $(sfp-obj-m) obj-$(CONFIG_AMD_PHY) += amd.o obj-$(CONFIG_AQUANTIA_PHY) += aquantia.o +obj-$(CONFIG_ASIX_PHY) += asix.o obj-$(CONFIG_AT803X_PHY) += at803x.o obj-$(CONFIG_BCM63XX_PHY) += bcm63xx.o obj-$(CONFIG_BCM7XXX_PHY) += bcm7xxx.o diff --git a/drivers/net/phy/asix.c b/drivers/net/phy/asix.c new file mode 100644 index 000000000000..8ebe7f5484ae --- /dev/null +++ b/drivers/net/phy/asix.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Driver for Asix PHYs + * + * Author: Michael Schmitz <schmitzmic@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mii.h> +#include <linux/phy.h> + +#define PHY_ID_ASIX_AX88796B 0x003b1841 + +MODULE_DESCRIPTION("Asix PHY driver"); +MODULE_AUTHOR("Michael Schmitz <schmitzmic@gmail.com>"); +MODULE_LICENSE("GPL"); + +/** + * asix_soft_reset - software reset the PHY via BMCR_RESET bit + * @phydev: target phy_device struct + * + * Description: Perform a software PHY reset using the standard + * BMCR_RESET bit and poll for the reset bit to be cleared. + * Toggle BMCR_RESET bit off to accommodate broken AX8796B PHY implementation + * such as used on the Individual Computers' X-Surf 100 Zorro card. + * + * Returns: 0 on success, < 0 on failure + */ +static int asix_soft_reset(struct phy_device *phydev) +{ + int ret; + + /* Asix PHY won't reset unless reset bit toggles */ + ret = phy_write(phydev, MII_BMCR, 0); + if (ret < 0) + return ret; + + return genphy_soft_reset(phydev); +} + +static struct phy_driver asix_driver[] = { { + .phy_id = PHY_ID_ASIX_AX88796B, + .name = "Asix Electronics AX88796B", + .phy_id_mask = 0xfffffff0, + .features = PHY_BASIC_FEATURES, + .soft_reset = asix_soft_reset, +} }; + +module_phy_driver(asix_driver); + +static struct mdio_device_id __maybe_unused asix_tbl[] = { + { PHY_ID_ASIX_AX88796B, 0xfffffff0 }, + { } +}; + +MODULE_DEVICE_TABLE(mdio, asix_tbl); diff --git a/drivers/net/phy/mdio-bitbang.c b/drivers/net/phy/mdio-bitbang.c index 403b085f0a89..15352f987bdf 100644 --- a/drivers/net/phy/mdio-bitbang.c +++ b/drivers/net/phy/mdio-bitbang.c @@ -205,14 +205,6 @@ static int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val) return 0; } -static int mdiobb_reset(struct mii_bus *bus) -{ - struct mdiobb_ctrl *ctrl = bus->priv; - if (ctrl->reset) - ctrl->reset(bus); - return 0; -} - struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl) { struct mii_bus *bus; @@ -225,7 +217,6 @@ struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl) bus->read = mdiobb_read; bus->write = mdiobb_write; - bus->reset = mdiobb_reset; bus->priv = ctrl; return bus; diff --git a/drivers/net/phy/mdio-boardinfo.c b/drivers/net/phy/mdio-boardinfo.c index 1861f387820d..863496fa5d13 100644 --- a/drivers/net/phy/mdio-boardinfo.c +++ b/drivers/net/phy/mdio-boardinfo.c @@ -30,17 +30,20 @@ void mdiobus_setup_mdiodev_from_board_info(struct mii_bus *bus, struct mdio_board_info *bi)) { struct mdio_board_entry *be; + struct mdio_board_entry *tmp; struct mdio_board_info *bi; int ret; mutex_lock(&mdio_board_lock); - list_for_each_entry(be, &mdio_board_list, list) { + list_for_each_entry_safe(be, tmp, &mdio_board_list, list) { bi = &be->board_info; if (strcmp(bus->id, bi->bus_id)) continue; + mutex_unlock(&mdio_board_lock); ret = cb(bus, bi); + mutex_lock(&mdio_board_lock); if (ret) continue; diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 4333c6e14742..b501221819e1 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -24,8 +24,10 @@ #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/platform_device.h> +#include <linux/mdio-bitbang.h> +#include <linux/mdio-gpio.h> #include <linux/gpio.h> -#include <linux/platform_data/mdio-gpio.h> +#include <linux/gpio/consumer.h> #include <linux/of_gpio.h> #include <linux/of_mdio.h> @@ -35,37 +37,22 @@ struct mdio_gpio_info { struct gpio_desc *mdc, *mdio, *mdo; }; -static void *mdio_gpio_of_get_data(struct platform_device *pdev) +static int mdio_gpio_get_data(struct device *dev, + struct mdio_gpio_info *bitbang) { - struct device_node *np = pdev->dev.of_node; - struct mdio_gpio_platform_data *pdata; - enum of_gpio_flags flags; - int ret; - - pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) - return NULL; - - ret = of_get_gpio_flags(np, 0, &flags); - if (ret < 0) - return NULL; - - pdata->mdc = ret; - pdata->mdc_active_low = flags & OF_GPIO_ACTIVE_LOW; - - ret = of_get_gpio_flags(np, 1, &flags); - if (ret < 0) - return NULL; - pdata->mdio = ret; - pdata->mdio_active_low = flags & OF_GPIO_ACTIVE_LOW; - - ret = of_get_gpio_flags(np, 2, &flags); - if (ret > 0) { - pdata->mdo = ret; - pdata->mdo_active_low = flags & OF_GPIO_ACTIVE_LOW; - } - - return pdata; + bitbang->mdc = devm_gpiod_get_index(dev, NULL, MDIO_GPIO_MDC, + GPIOD_OUT_LOW); + if (IS_ERR(bitbang->mdc)) + return PTR_ERR(bitbang->mdc); + + bitbang->mdio = devm_gpiod_get_index(dev, NULL, MDIO_GPIO_MDIO, + GPIOD_IN); + if (IS_ERR(bitbang->mdio)) + return PTR_ERR(bitbang->mdio); + + bitbang->mdo = devm_gpiod_get_index_optional(dev, NULL, MDIO_GPIO_MDO, + GPIOD_OUT_LOW); + return PTR_ERR_OR_ZERO(bitbang->mdo); } static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir) @@ -125,78 +112,28 @@ static const struct mdiobb_ops mdio_gpio_ops = { }; static struct mii_bus *mdio_gpio_bus_init(struct device *dev, - struct mdio_gpio_platform_data *pdata, + struct mdio_gpio_info *bitbang, int bus_id) { struct mii_bus *new_bus; - struct mdio_gpio_info *bitbang; - int i; - int mdc, mdio, mdo; - unsigned long mdc_flags = GPIOF_OUT_INIT_LOW; - unsigned long mdio_flags = GPIOF_DIR_IN; - unsigned long mdo_flags = GPIOF_OUT_INIT_HIGH; - - bitbang = devm_kzalloc(dev, sizeof(*bitbang), GFP_KERNEL); - if (!bitbang) - goto out; bitbang->ctrl.ops = &mdio_gpio_ops; - bitbang->ctrl.reset = pdata->reset; - mdc = pdata->mdc; - bitbang->mdc = gpio_to_desc(mdc); - if (pdata->mdc_active_low) - mdc_flags = GPIOF_OUT_INIT_HIGH | GPIOF_ACTIVE_LOW; - mdio = pdata->mdio; - bitbang->mdio = gpio_to_desc(mdio); - if (pdata->mdio_active_low) - mdio_flags |= GPIOF_ACTIVE_LOW; - mdo = pdata->mdo; - if (mdo) { - bitbang->mdo = gpio_to_desc(mdo); - if (pdata->mdo_active_low) - mdo_flags = GPIOF_OUT_INIT_LOW | GPIOF_ACTIVE_LOW; - } new_bus = alloc_mdio_bitbang(&bitbang->ctrl); if (!new_bus) - goto out; - - new_bus->name = "GPIO Bitbanged MDIO", + return NULL; - new_bus->phy_mask = pdata->phy_mask; - new_bus->phy_ignore_ta_mask = pdata->phy_ignore_ta_mask; - memcpy(new_bus->irq, pdata->irqs, sizeof(new_bus->irq)); + new_bus->name = "GPIO Bitbanged MDIO"; new_bus->parent = dev; - if (new_bus->phy_mask == ~0) - goto out_free_bus; - - for (i = 0; i < PHY_MAX_ADDR; i++) - if (!new_bus->irq[i]) - new_bus->irq[i] = PHY_POLL; - if (bus_id != -1) snprintf(new_bus->id, MII_BUS_ID_SIZE, "gpio-%x", bus_id); else strncpy(new_bus->id, "gpio", MII_BUS_ID_SIZE); - if (devm_gpio_request_one(dev, mdc, mdc_flags, "mdc")) - goto out_free_bus; - - if (devm_gpio_request_one(dev, mdio, mdio_flags, "mdio")) - goto out_free_bus; - - if (mdo && devm_gpio_request_one(dev, mdo, mdo_flags, "mdo")) - goto out_free_bus; - dev_set_drvdata(dev, new_bus); return new_bus; - -out_free_bus: - free_mdio_bitbang(new_bus); -out: - return NULL; } static void mdio_gpio_bus_deinit(struct device *dev) @@ -216,26 +153,29 @@ static void mdio_gpio_bus_destroy(struct device *dev) static int mdio_gpio_probe(struct platform_device *pdev) { - struct mdio_gpio_platform_data *pdata; + struct mdio_gpio_info *bitbang; struct mii_bus *new_bus; int ret, bus_id; + bitbang = devm_kzalloc(&pdev->dev, sizeof(*bitbang), GFP_KERNEL); + if (!bitbang) + return -ENOMEM; + + ret = mdio_gpio_get_data(&pdev->dev, bitbang); + if (ret) + return ret; + if (pdev->dev.of_node) { - pdata = mdio_gpio_of_get_data(pdev); bus_id = of_alias_get_id(pdev->dev.of_node, "mdio-gpio"); if (bus_id < 0) { dev_warn(&pdev->dev, "failed to get alias id\n"); bus_id = 0; } } else { - pdata = dev_get_platdata(&pdev->dev); bus_id = pdev->id; } - if (!pdata) - return -ENODEV; - - new_bus = mdio_gpio_bus_init(&pdev->dev, pdata, bus_id); + new_bus = mdio_gpio_bus_init(&pdev->dev, bitbang, bus_id); if (!new_bus) return -ENODEV; diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 0f293ef28935..ef5e160fe9dc 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -20,6 +20,8 @@ #include <linux/ethtool.h> #include <linux/phy.h> #include <linux/microchipphy.h> +#include <linux/of.h> +#include <dt-bindings/net/microchip-lan78xx.h> #define DRIVER_AUTHOR "WOOJUNG HUH <woojung.huh@microchip.com>" #define DRIVER_DESC "Microchip LAN88XX PHY driver" @@ -70,6 +72,8 @@ static int lan88xx_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; struct lan88xx_priv *priv; + u32 led_modes[4]; + int len; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -77,6 +81,27 @@ static int lan88xx_probe(struct phy_device *phydev) priv->wolopts = 0; + len = of_property_read_variable_u32_array(dev->of_node, + "microchip,led-modes", + led_modes, + 0, + ARRAY_SIZE(led_modes)); + if (len >= 0) { + u32 reg = 0; + int i; + + for (i = 0; i < len; i++) { + if (led_modes[i] > 15) + return -EINVAL; + reg |= led_modes[i] << (i * 4); + } + for (; i < ARRAY_SIZE(led_modes); i++) + reg |= LAN78XX_FORCE_LED_OFF << (i * 4); + (void)phy_write(phydev, LAN78XX_PHY_LED_MODE_SELECT, reg); + } else if (len == -EOVERFLOW) { + return -EINVAL; + } + /* these values can be used to identify internal PHY */ priv->chip_id = phy_read_mmd(phydev, 3, LAN88XX_MMD3_CHIP_ID); priv->chip_rev = phy_read_mmd(phydev, 3, LAN88XX_MMD3_CHIP_REV); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index a6c6ce19eeee..8a8611095ca0 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2918,7 +2918,7 @@ static int team_device_event(struct notifier_block *unused, case NETDEV_CHANGE: if (netif_running(port->dev)) team_port_change_check(port, - !!netif_carrier_ok(port->dev)); + !!netif_oper_up(port->dev)); break; case NETDEV_UNREGISTER: team_del_slave(port->team->dev, dev); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 901351a6ed21..11028d5f5e8d 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -525,11 +525,6 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, rcu_read_lock(); - /* We may get a very small possibility of OOO during switching, not - * worth to optimize.*/ - if (tun->numqueues == 1 || tfile->detached) - goto unlock; - e = tun_flow_find(head, rxhash); if (likely(e)) { /* TODO: keep queueing to old queue until it's empty? */ @@ -548,7 +543,6 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, spin_unlock_bh(&tun->lock); } -unlock: rcu_read_unlock(); } @@ -1938,10 +1932,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, rcu_read_unlock(); } - rcu_read_lock(); - if (!rcu_dereference(tun->steering_prog)) + /* Compute the costly rx hash only if needed for flow updates. + * We may get a very small possibility of OOO during switching, not + * worth to optimize. + */ + if (!rcu_access_pointer(tun->steering_prog) && tun->numqueues > 1 && + !tfile->detached) rxhash = __skb_get_hash_symmetric(skb); - rcu_read_unlock(); if (frags) { /* Exercise flow dissector code path. */ diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 0867f7275852..c59f8afd0d73 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -37,6 +37,8 @@ #include <linux/irqchip/chained_irq.h> #include <linux/microchipphy.h> #include <linux/phy.h> +#include <linux/of_mdio.h> +#include <linux/of_net.h> #include "lan78xx.h" #define DRIVER_AUTHOR "WOOJUNG HUH <woojung.huh@microchip.com>" @@ -278,6 +280,30 @@ struct lan78xx_statstage64 { u64 eee_tx_lpi_time; }; +static u32 lan78xx_regs[] = { + ID_REV, + INT_STS, + HW_CFG, + PMT_CTL, + E2P_CMD, + E2P_DATA, + USB_STATUS, + VLAN_TYPE, + MAC_CR, + MAC_RX, + MAC_TX, + FLOW, + ERR_STS, + MII_ACC, + MII_DATA, + EEE_TX_LPI_REQ_DLY, + EEE_TW_TX_SYS, + EEE_TX_LPI_REM_DLY, + WUCSR +}; + +#define PHY_REG_SIZE (32 * sizeof(u32)) + struct lan78xx_net; struct lan78xx_priv { @@ -1605,6 +1631,34 @@ exit: return ret; } +static int lan78xx_get_regs_len(struct net_device *netdev) +{ + if (!netdev->phydev) + return (sizeof(lan78xx_regs)); + else + return (sizeof(lan78xx_regs) + PHY_REG_SIZE); +} + +static void +lan78xx_get_regs(struct net_device *netdev, struct ethtool_regs *regs, + void *buf) +{ + u32 *data = buf; + int i, j; + struct lan78xx_net *dev = netdev_priv(netdev); + + /* Read Device/MAC registers */ + for (i = 0; i < (sizeof(lan78xx_regs) / sizeof(u32)); i++) + lan78xx_read_reg(dev, lan78xx_regs[i], &data[i]); + + if (!netdev->phydev) + return; + + /* Read PHY registers */ + for (j = 0; j < 32; i++, j++) + data[i] = phy_read(netdev->phydev, j); +} + static const struct ethtool_ops lan78xx_ethtool_ops = { .get_link = lan78xx_get_link, .nway_reset = phy_ethtool_nway_reset, @@ -1625,6 +1679,8 @@ static const struct ethtool_ops lan78xx_ethtool_ops = { .set_pauseparam = lan78xx_set_pause, .get_link_ksettings = lan78xx_get_link_ksettings, .set_link_ksettings = lan78xx_set_link_ksettings, + .get_regs_len = lan78xx_get_regs_len, + .get_regs = lan78xx_get_regs, }; static int lan78xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) @@ -1652,34 +1708,31 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev) addr[5] = (addr_hi >> 8) & 0xFF; if (!is_valid_ether_addr(addr)) { - /* reading mac address from EEPROM or OTP */ - if ((lan78xx_read_eeprom(dev, EEPROM_MAC_OFFSET, ETH_ALEN, - addr) == 0) || - (lan78xx_read_otp(dev, EEPROM_MAC_OFFSET, ETH_ALEN, - addr) == 0)) { - if (is_valid_ether_addr(addr)) { - /* eeprom values are valid so use them */ - netif_dbg(dev, ifup, dev->net, - "MAC address read from EEPROM"); - } else { - /* generate random MAC */ - random_ether_addr(addr); - netif_dbg(dev, ifup, dev->net, - "MAC address set to random addr"); - } - - addr_lo = addr[0] | (addr[1] << 8) | - (addr[2] << 16) | (addr[3] << 24); - addr_hi = addr[4] | (addr[5] << 8); - - ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); - ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + if (!eth_platform_get_mac_address(&dev->udev->dev, addr)) { + /* valid address present in Device Tree */ + netif_dbg(dev, ifup, dev->net, + "MAC address read from Device Tree"); + } else if (((lan78xx_read_eeprom(dev, EEPROM_MAC_OFFSET, + ETH_ALEN, addr) == 0) || + (lan78xx_read_otp(dev, EEPROM_MAC_OFFSET, + ETH_ALEN, addr) == 0)) && + is_valid_ether_addr(addr)) { + /* eeprom values are valid so use them */ + netif_dbg(dev, ifup, dev->net, + "MAC address read from EEPROM"); } else { /* generate random MAC */ random_ether_addr(addr); netif_dbg(dev, ifup, dev->net, "MAC address set to random addr"); } + + addr_lo = addr[0] | (addr[1] << 8) | + (addr[2] << 16) | (addr[3] << 24); + addr_hi = addr[4] | (addr[5] << 8); + + ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); + ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); } ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); @@ -1762,6 +1815,7 @@ done: static int lan78xx_mdio_init(struct lan78xx_net *dev) { + struct device_node *node; int ret; dev->mdiobus = mdiobus_alloc(); @@ -1790,7 +1844,13 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev) break; } - ret = mdiobus_register(dev->mdiobus); + node = of_get_child_by_name(dev->udev->dev.of_node, "mdio"); + if (node) { + ret = of_mdiobus_register(dev->mdiobus, node); + of_node_put(node); + } else { + ret = mdiobus_register(dev->mdiobus); + } if (ret) { netdev_err(dev->net, "can't register MDIO bus\n"); goto exit1; @@ -2079,6 +2139,28 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) mii_adv = (u32)mii_advertise_flowctrl(dev->fc_request_control); phydev->advertising |= mii_adv_to_ethtool_adv_t(mii_adv); + if (phydev->mdio.dev.of_node) { + u32 reg; + int len; + + len = of_property_count_elems_of_size(phydev->mdio.dev.of_node, + "microchip,led-modes", + sizeof(u32)); + if (len >= 0) { + /* Ensure the appropriate LEDs are enabled */ + lan78xx_read_reg(dev, HW_CFG, ®); + reg &= ~(HW_CFG_LED0_EN_ | + HW_CFG_LED1_EN_ | + HW_CFG_LED2_EN_ | + HW_CFG_LED3_EN_); + reg |= (len > 0) * HW_CFG_LED0_EN_ | + (len > 1) * HW_CFG_LED1_EN_ | + (len > 2) * HW_CFG_LED2_EN_ | + (len > 3) * HW_CFG_LED3_EN_; + lan78xx_write_reg(dev, HW_CFG, reg); + } + } + genphy_config_aneg(phydev); dev->fc_autoneg = phydev->autoneg; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 0a2b180d138a..90b5f3900c22 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -48,6 +48,9 @@ static unsigned int vrf_net_id; struct net_vrf { struct rtable __rcu *rth; struct rt6_info __rcu *rt6; +#if IS_ENABLED(CONFIG_IPV6) + struct fib6_table *fib6_table; +#endif u32 tb_id; }; @@ -496,7 +499,6 @@ static int vrf_rt6_create(struct net_device *dev) int flags = DST_HOST | DST_NOPOLICY | DST_NOXFRM; struct net_vrf *vrf = netdev_priv(dev); struct net *net = dev_net(dev); - struct fib6_table *rt6i_table; struct rt6_info *rt6; int rc = -ENOMEM; @@ -504,8 +506,8 @@ static int vrf_rt6_create(struct net_device *dev) if (!ipv6_mod_enabled()) return 0; - rt6i_table = fib6_new_table(net, vrf->tb_id); - if (!rt6i_table) + vrf->fib6_table = fib6_new_table(net, vrf->tb_id); + if (!vrf->fib6_table) goto out; /* create a dst for routing packets out a VRF device */ @@ -513,7 +515,6 @@ static int vrf_rt6_create(struct net_device *dev) if (!rt6) goto out; - rt6->rt6i_table = rt6i_table; rt6->dst.output = vrf_output6; rcu_assign_pointer(vrf->rt6, rt6); @@ -946,22 +947,8 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net, int flags) { struct net_vrf *vrf = netdev_priv(dev); - struct fib6_table *table = NULL; - struct rt6_info *rt6; - - rcu_read_lock(); - - /* fib6_table does not have a refcnt and can not be freed */ - rt6 = rcu_dereference(vrf->rt6); - if (likely(rt6)) - table = rt6->rt6i_table; - - rcu_read_unlock(); - - if (!table) - return NULL; - return ip6_pol_route(net, table, ifindex, fl6, skb, flags); + return ip6_pol_route(net, vrf->fib6_table, ifindex, fl6, skb, flags); } static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c index 026182d3b27c..224d7ddeeb76 100644 --- a/drivers/soc/ti/knav_dma.c +++ b/drivers/soc/ti/knav_dma.c @@ -134,6 +134,13 @@ struct knav_dma_chan { static struct knav_dma_pool_device *kdev; +static bool device_ready; +bool knav_dma_device_ready(void) +{ + return device_ready; +} +EXPORT_SYMBOL_GPL(knav_dma_device_ready); + static bool check_config(struct knav_dma_chan *chan, struct knav_dma_cfg *cfg) { if (!memcmp(&chan->cfg, cfg, sizeof(*cfg))) @@ -773,6 +780,7 @@ static int knav_dma_probe(struct platform_device *pdev) debugfs_create_file("knav_dma", S_IFREG | S_IRUGO, NULL, NULL, &knav_dma_debug_ops); + device_ready = true; return ret; } diff --git a/drivers/soc/ti/knav_qmss.h b/drivers/soc/ti/knav_qmss.h index 905b974d1bdc..56866ba4cfc4 100644 --- a/drivers/soc/ti/knav_qmss.h +++ b/drivers/soc/ti/knav_qmss.h @@ -292,6 +292,11 @@ struct knav_queue { struct list_head list; }; +enum qmss_version { + QMSS, + QMSS_66AK2G, +}; + struct knav_device { struct device *dev; unsigned base_id; @@ -305,6 +310,7 @@ struct knav_device { struct list_head pools; struct list_head pdsps; struct list_head qmgrs; + enum qmss_version version; }; struct knav_range_ops { diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c index 77d6b5c03aae..419365a8d1c2 100644 --- a/drivers/soc/ti/knav_qmss_queue.c +++ b/drivers/soc/ti/knav_qmss_queue.c @@ -42,6 +42,15 @@ static DEFINE_MUTEX(knav_dev_lock); #define KNAV_QUEUE_PUSH_REG_INDEX 4 #define KNAV_QUEUE_POP_REG_INDEX 5 +/* Queue manager register indices in DTS for QMSS in K2G NAVSS. + * There are no status and vbusm push registers on this version + * of QMSS. Push registers are same as pop, So all indices above 1 + * are to be re-defined + */ +#define KNAV_L_QUEUE_CONFIG_REG_INDEX 1 +#define KNAV_L_QUEUE_REGION_REG_INDEX 2 +#define KNAV_L_QUEUE_PUSH_REG_INDEX 3 + /* PDSP register indices in DTS */ #define KNAV_QUEUE_PDSP_IRAM_REG_INDEX 0 #define KNAV_QUEUE_PDSP_REGS_REG_INDEX 1 @@ -65,6 +74,13 @@ static DEFINE_MUTEX(knav_dev_lock); */ const char *knav_acc_firmwares[] = {"ks2_qmss_pdsp_acc48.bin"}; +static bool device_ready; +bool knav_qmss_device_ready(void) +{ + return device_ready; +} +EXPORT_SYMBOL_GPL(knav_qmss_device_ready); + /** * knav_queue_notify: qmss queue notfier call * @@ -1169,8 +1185,12 @@ static int knav_queue_setup_link_ram(struct knav_device *kdev) dev_dbg(kdev->dev, "linkram0: dma:%pad, virt:%p, size:%x\n", &block->dma, block->virt, block->size); writel_relaxed((u32)block->dma, &qmgr->reg_config->link_ram_base0); - writel_relaxed(block->size, &qmgr->reg_config->link_ram_size0); - + if (kdev->version == QMSS_66AK2G) + writel_relaxed(block->size, + &qmgr->reg_config->link_ram_size0); + else + writel_relaxed(block->size - 1, + &qmgr->reg_config->link_ram_size0); block++; if (!block->size) continue; @@ -1387,42 +1407,64 @@ static int knav_queue_init_qmgrs(struct knav_device *kdev, qmgr->reg_peek = knav_queue_map_reg(kdev, child, KNAV_QUEUE_PEEK_REG_INDEX); - qmgr->reg_status = - knav_queue_map_reg(kdev, child, - KNAV_QUEUE_STATUS_REG_INDEX); + + if (kdev->version == QMSS) { + qmgr->reg_status = + knav_queue_map_reg(kdev, child, + KNAV_QUEUE_STATUS_REG_INDEX); + } + qmgr->reg_config = knav_queue_map_reg(kdev, child, + (kdev->version == QMSS_66AK2G) ? + KNAV_L_QUEUE_CONFIG_REG_INDEX : KNAV_QUEUE_CONFIG_REG_INDEX); qmgr->reg_region = knav_queue_map_reg(kdev, child, + (kdev->version == QMSS_66AK2G) ? + KNAV_L_QUEUE_REGION_REG_INDEX : KNAV_QUEUE_REGION_REG_INDEX); + qmgr->reg_push = knav_queue_map_reg(kdev, child, - KNAV_QUEUE_PUSH_REG_INDEX); - qmgr->reg_pop = - knav_queue_map_reg(kdev, child, - KNAV_QUEUE_POP_REG_INDEX); + (kdev->version == QMSS_66AK2G) ? + KNAV_L_QUEUE_PUSH_REG_INDEX : + KNAV_QUEUE_PUSH_REG_INDEX); + + if (kdev->version == QMSS) { + qmgr->reg_pop = + knav_queue_map_reg(kdev, child, + KNAV_QUEUE_POP_REG_INDEX); + } - if (IS_ERR(qmgr->reg_peek) || IS_ERR(qmgr->reg_status) || + if (IS_ERR(qmgr->reg_peek) || + ((kdev->version == QMSS) && + (IS_ERR(qmgr->reg_status) || IS_ERR(qmgr->reg_pop))) || IS_ERR(qmgr->reg_config) || IS_ERR(qmgr->reg_region) || - IS_ERR(qmgr->reg_push) || IS_ERR(qmgr->reg_pop)) { + IS_ERR(qmgr->reg_push)) { dev_err(dev, "failed to map qmgr regs\n"); + if (kdev->version == QMSS) { + if (!IS_ERR(qmgr->reg_status)) + devm_iounmap(dev, qmgr->reg_status); + if (!IS_ERR(qmgr->reg_pop)) + devm_iounmap(dev, qmgr->reg_pop); + } if (!IS_ERR(qmgr->reg_peek)) devm_iounmap(dev, qmgr->reg_peek); - if (!IS_ERR(qmgr->reg_status)) - devm_iounmap(dev, qmgr->reg_status); if (!IS_ERR(qmgr->reg_config)) devm_iounmap(dev, qmgr->reg_config); if (!IS_ERR(qmgr->reg_region)) devm_iounmap(dev, qmgr->reg_region); if (!IS_ERR(qmgr->reg_push)) devm_iounmap(dev, qmgr->reg_push); - if (!IS_ERR(qmgr->reg_pop)) - devm_iounmap(dev, qmgr->reg_pop); devm_kfree(dev, qmgr); continue; } + /* Use same push register for pop as well */ + if (kdev->version == QMSS_66AK2G) + qmgr->reg_pop = qmgr->reg_push; + list_add_tail(&qmgr->list, &kdev->qmgrs); dev_info(dev, "added qmgr start queue %d, num of queues %d, reg_peek %p, reg_status %p, reg_config %p, reg_region %p, reg_push %p, reg_pop %p\n", qmgr->start_queue, qmgr->num_queues, @@ -1681,10 +1723,24 @@ static int knav_queue_init_queues(struct knav_device *kdev) return 0; } +/* Match table for of_platform binding */ +static const struct of_device_id keystone_qmss_of_match[] = { + { + .compatible = "ti,keystone-navigator-qmss", + }, + { + .compatible = "ti,66ak2g-navss-qm", + .data = (void *)QMSS_66AK2G, + }, + {}, +}; +MODULE_DEVICE_TABLE(of, keystone_qmss_of_match); + static int knav_queue_probe(struct platform_device *pdev) { struct device_node *node = pdev->dev.of_node; struct device_node *qmgrs, *queue_pools, *regions, *pdsps; + const struct of_device_id *match; struct device *dev = &pdev->dev; u32 temp[2]; int ret; @@ -1700,6 +1756,10 @@ static int knav_queue_probe(struct platform_device *pdev) return -ENOMEM; } + match = of_match_device(of_match_ptr(keystone_qmss_of_match), dev); + if (match && match->data) + kdev->version = QMSS_66AK2G; + platform_set_drvdata(pdev, kdev); kdev->dev = dev; INIT_LIST_HEAD(&kdev->queue_ranges); @@ -1796,6 +1856,7 @@ static int knav_queue_probe(struct platform_device *pdev) debugfs_create_file("qmss", S_IFREG | S_IRUGO, NULL, NULL, &knav_queue_debug_ops); + device_ready = true; return 0; err: @@ -1815,13 +1876,6 @@ static int knav_queue_remove(struct platform_device *pdev) return 0; } -/* Match table for of_platform binding */ -static struct of_device_id keystone_qmss_of_match[] = { - { .compatible = "ti,keystone-navigator-qmss", }, - {}, -}; -MODULE_DEVICE_TABLE(of, keystone_qmss_of_match); - static struct platform_driver keystone_qmss_driver = { .probe = knav_queue_probe, .remove = knav_queue_remove, diff --git a/include/dt-bindings/net/microchip-lan78xx.h b/include/dt-bindings/net/microchip-lan78xx.h new file mode 100644 index 000000000000..0742ff075307 --- /dev/null +++ b/include/dt-bindings/net/microchip-lan78xx.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DT_BINDINGS_MICROCHIP_LAN78XX_H +#define _DT_BINDINGS_MICROCHIP_LAN78XX_H + +/* LED modes for LAN7800/LAN7850 embedded PHY */ + +#define LAN78XX_LINK_ACTIVITY 0 +#define LAN78XX_LINK_1000_ACTIVITY 1 +#define LAN78XX_LINK_100_ACTIVITY 2 +#define LAN78XX_LINK_10_ACTIVITY 3 +#define LAN78XX_LINK_100_1000_ACTIVITY 4 +#define LAN78XX_LINK_10_1000_ACTIVITY 5 +#define LAN78XX_LINK_10_100_ACTIVITY 6 +#define LAN78XX_DUPLEX_COLLISION 8 +#define LAN78XX_COLLISION 9 +#define LAN78XX_ACTIVITY 10 +#define LAN78XX_AUTONEG_FAULT 12 +#define LAN78XX_FORCE_LED_OFF 14 +#define LAN78XX_FORCE_LED_ON 15 + +#endif diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h index a8ac9cfa014c..5d71e8a8500f 100644 --- a/include/linux/mdio-bitbang.h +++ b/include/linux/mdio-bitbang.h @@ -33,8 +33,6 @@ struct mdiobb_ops { struct mdiobb_ctrl { const struct mdiobb_ops *ops; - /* reset callback */ - int (*reset)(struct mii_bus *bus); }; /* The returned bus is not yet registered with the phy layer. */ diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h new file mode 100644 index 000000000000..cea443a672cb --- /dev/null +++ b/include/linux/mdio-gpio.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_MDIO_GPIO_H +#define __LINUX_MDIO_GPIO_H + +#define MDIO_GPIO_MDC 0 +#define MDIO_GPIO_MDIO 1 +#define MDIO_GPIO_MDO 2 + +#endif diff --git a/include/linux/microchipphy.h b/include/linux/microchipphy.h index eb492d47f717..8e4015e7f8d3 100644 --- a/include/linux/microchipphy.h +++ b/include/linux/microchipphy.h @@ -70,4 +70,7 @@ #define LAN88XX_MMD3_CHIP_ID (32877) #define LAN88XX_MMD3_CHIP_REV (32878) +/* Registers specific to the LAN7800/LAN7850 embedded phy */ +#define LAN78XX_PHY_LED_MODE_SELECT (0x1D) + #endif /* _MICROCHIPPHY_H */ diff --git a/include/linux/platform_data/mdio-gpio.h b/include/linux/platform_data/mdio-gpio.h deleted file mode 100644 index 11f00cdabe3d..000000000000 --- a/include/linux/platform_data/mdio-gpio.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * MDIO-GPIO bus platform data structures - * - * Copyright (C) 2008, Paulius Zaleckas <paulius.zaleckas@teltonika.lt> - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#ifndef __LINUX_MDIO_GPIO_H -#define __LINUX_MDIO_GPIO_H - -#include <linux/mdio-bitbang.h> - -struct mdio_gpio_platform_data { - /* GPIO numbers for bus pins */ - unsigned int mdc; - unsigned int mdio; - unsigned int mdo; - - bool mdc_active_low; - bool mdio_active_low; - bool mdo_active_low; - - u32 phy_mask; - u32 phy_ignore_ta_mask; - int irqs[PHY_MAX_ADDR]; - /* reset callback */ - int (*reset)(struct mii_bus *bus); -}; - -#endif /* __LINUX_MDIO_GPIO_H */ diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 147d08ccf813..7f9756fe9180 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -352,6 +352,7 @@ struct qed_eth_ops { int (*configure_arfs_searcher)(struct qed_dev *cdev, enum qed_filter_config_mode mode); int (*get_coalesce)(struct qed_dev *cdev, u16 *coal, void *handle); + int (*req_bulletin_update_mac)(struct qed_dev *cdev, u8 *mac); }; const struct qed_eth_ops *qed_get_eth_ops(void); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9065477ed255..d274059529eb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3131,6 +3131,7 @@ static inline void *skb_push_rcsum(struct sk_buff *skb, unsigned int len) return skb->data; } +int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); /** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim @@ -3144,9 +3145,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { if (likely(len >= skb->len)) return 0; - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; - return __pskb_trim(skb, len); + return pskb_trim_rcsum_slow(skb, len); } static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len) diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h index 66693bc4c6ad..7127ec301537 100644 --- a/include/linux/soc/ti/knav_dma.h +++ b/include/linux/soc/ti/knav_dma.h @@ -167,6 +167,8 @@ struct knav_dma_desc { void *knav_dma_open_channel(struct device *dev, const char *name, struct knav_dma_cfg *config); void knav_dma_close_channel(void *channel); +int knav_dma_get_flow(void *channel); +bool knav_dma_device_ready(void); #else static inline void *knav_dma_open_channel(struct device *dev, const char *name, struct knav_dma_cfg *config) @@ -176,6 +178,16 @@ static inline void *knav_dma_open_channel(struct device *dev, const char *name, static inline void knav_dma_close_channel(void *channel) {} +static inline int knav_dma_get_flow(void *channel) +{ + return -EINVAL; +} + +static inline bool knav_dma_device_ready(void) +{ + return false; +} + #endif #endif /* __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__ */ diff --git a/include/linux/soc/ti/knav_qmss.h b/include/linux/soc/ti/knav_qmss.h index 9f0ebb3bad27..9745df6ed9d3 100644 --- a/include/linux/soc/ti/knav_qmss.h +++ b/include/linux/soc/ti/knav_qmss.h @@ -86,5 +86,6 @@ int knav_pool_desc_map(void *ph, void *desc, unsigned size, void *knav_pool_desc_unmap(void *ph, dma_addr_t dma, unsigned dma_sz); dma_addr_t knav_pool_desc_virt_to_dma(void *ph, void *virt); void *knav_pool_desc_dma_to_virt(void *ph, dma_addr_t dma); +bool knav_qmss_device_ready(void); #endif /* __SOC_TI_KNAV_QMSS_H__ */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8f4c54986f97..20585d5c4e1c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -281,6 +281,7 @@ struct tcp_sock { * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ + u32 delivered_ce; /* Like the above but only ECE marked packets */ u32 lost; /* Total data packets lost incl. rexmits */ u32 app_limited; /* limited until "delivered" reaches this val */ u64 first_tx_mstamp; /* start of window send phase */ diff --git a/include/net/ax88796.h b/include/net/ax88796.h index b9a3beca0ce4..84b3785d0e66 100644 --- a/include/net/ax88796.h +++ b/include/net/ax88796.h @@ -12,6 +12,10 @@ #ifndef __NET_AX88796_PLAT_H #define __NET_AX88796_PLAT_H +struct sk_buff; +struct net_device; +struct platform_device; + #define AXFLG_HAS_EEPROM (1<<0) #define AXFLG_MAC_FROMDEV (1<<1) /* device already has MAC */ #define AXFLG_HAS_93CX6 (1<<2) /* use eeprom_93cx6 driver */ @@ -26,6 +30,16 @@ struct ax_plat_data { u32 *reg_offsets; /* register offsets */ u8 *mac_addr; /* MAC addr (only used when AXFLG_MAC_FROMPLATFORM is used */ + + /* uses default ax88796 buffer if set to NULL */ + void (*block_output)(struct net_device *dev, int count, + const unsigned char *buf, int star_page); + void (*block_input)(struct net_device *dev, int count, + struct sk_buff *skb, int ring_offset); + /* returns nonzero if a pending interrupt request might by caused by + * the ax88786. Handles all interrupts if set to NULL + */ + int (*check_irq)(struct platform_device *pdev); }; #endif /* __NET_AX88796_PLAT_H */ diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index d4088d1a688d..db389253dc2a 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -64,7 +64,7 @@ struct inet6_ifaddr { struct delayed_work dad_work; struct inet6_dev *idev; - struct rt6_info *rt; + struct fib6_info *rt; struct hlist_node addr_lst; struct list_head if_list; @@ -143,8 +143,7 @@ struct ipv6_ac_socklist { struct ifacaddr6 { struct in6_addr aca_addr; - struct inet6_dev *aca_idev; - struct rt6_info *aca_rt; + struct fib6_info *aca_rt; struct ifacaddr6 *aca_next; int aca_users; refcount_t aca_refcnt; diff --git a/include/net/ip.h b/include/net/ip.h index ecffd843e7b8..dc4a2d6e58a5 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -396,6 +396,9 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); } +int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len, + u32 *metrics); + u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 5e86fd9dc857..642211174692 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -38,6 +38,7 @@ #endif struct rt6_info; +struct fib6_info; struct fib6_config { u32 fc_table; @@ -74,12 +75,12 @@ struct fib6_node { #ifdef CONFIG_IPV6_SUBTREES struct fib6_node __rcu *subtree; #endif - struct rt6_info __rcu *leaf; + struct fib6_info __rcu *leaf; __u16 fn_bit; /* bit key */ __u16 fn_flags; int fn_sernum; - struct rt6_info __rcu *rr_ptr; + struct fib6_info __rcu *rr_ptr; struct rcu_head rcu; }; @@ -94,11 +95,6 @@ struct fib6_gc_args { #define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1)) #endif -struct mx6_config { - const u32 *mx; - DECLARE_BITMAP(mx_valid, RTAX_MAX); -}; - /* * routing information * @@ -127,56 +123,71 @@ struct rt6_exception { #define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT) #define FIB6_MAX_DEPTH 5 -struct rt6_info { - struct dst_entry dst; - struct rt6_info __rcu *rt6_next; - struct rt6_info *from; +struct fib6_nh { + struct in6_addr nh_gw; + struct net_device *nh_dev; + struct lwtunnel_state *nh_lwtstate; - /* - * Tail elements of dst_entry (__refcnt etc.) - * and these elements (rarely used in hot path) are in - * the same cache line. - */ - struct fib6_table *rt6i_table; - struct fib6_node __rcu *rt6i_node; + unsigned int nh_flags; + atomic_t nh_upper_bound; + int nh_weight; +}; - struct in6_addr rt6i_gateway; +struct fib6_info { + struct fib6_table *fib6_table; + struct fib6_info __rcu *rt6_next; + struct fib6_node __rcu *fib6_node; /* Multipath routes: - * siblings is a list of rt6_info that have the the same metric/weight, + * siblings is a list of fib6_info that have the the same metric/weight, * destination, but not the same gateway. nsiblings is just a cache * to speed up lookup. */ - struct list_head rt6i_siblings; - unsigned int rt6i_nsiblings; - atomic_t rt6i_nh_upper_bound; + struct list_head fib6_siblings; + unsigned int fib6_nsiblings; - atomic_t rt6i_ref; + atomic_t fib6_ref; + unsigned long expires; + struct dst_metrics *fib6_metrics; +#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] - unsigned int rt6i_nh_flags; + struct rt6key fib6_dst; + u32 fib6_flags; + struct rt6key fib6_src; + struct rt6key fib6_prefsrc; - /* These are in a separate cache line. */ - struct rt6key rt6i_dst ____cacheline_aligned_in_smp; - u32 rt6i_flags; + struct rt6_info * __percpu *rt6i_pcpu; + struct rt6_exception_bucket __rcu *rt6i_exception_bucket; + + u32 fib6_metric; + u8 fib6_protocol; + u8 fib6_type; + u8 exception_bucket_flushed:1, + should_flush:1, + dst_nocount:1, + dst_nopolicy:1, + dst_host:1, + unused:3; + + struct fib6_nh fib6_nh; +}; + +struct rt6_info { + struct dst_entry dst; + struct fib6_info *from; + + struct rt6key rt6i_dst; struct rt6key rt6i_src; + struct in6_addr rt6i_gateway; + struct inet6_dev *rt6i_idev; + u32 rt6i_flags; struct rt6key rt6i_prefsrc; struct list_head rt6i_uncached; struct uncached_list *rt6i_uncached_list; - struct inet6_dev *rt6i_idev; - struct rt6_info * __percpu *rt6i_pcpu; - struct rt6_exception_bucket __rcu *rt6i_exception_bucket; - - u32 rt6i_metric; - u32 rt6i_pmtu; /* more non-fragment space at head required */ - int rt6i_nh_weight; unsigned short rt6i_nfheader_len; - u8 rt6i_protocol; - u8 exception_bucket_flushed:1, - should_flush:1, - unused:6; }; #define for_each_fib6_node_rt_rcu(fn) \ @@ -192,6 +203,26 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) return ((struct rt6_info *)dst)->rt6i_idev; } +static inline void fib6_clean_expires(struct fib6_info *f6i) +{ + f6i->fib6_flags &= ~RTF_EXPIRES; + f6i->expires = 0; +} + +static inline void fib6_set_expires(struct fib6_info *f6i, + unsigned long expires) +{ + f6i->expires = expires; + f6i->fib6_flags |= RTF_EXPIRES; +} + +static inline bool fib6_check_expired(const struct fib6_info *f6i) +{ + if (f6i->fib6_flags & RTF_EXPIRES) + return time_after(jiffies, f6i->expires); + return false; +} + static inline void rt6_clean_expires(struct rt6_info *rt) { rt->rt6i_flags &= ~RTF_EXPIRES; @@ -206,11 +237,9 @@ static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires) static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) { - struct rt6_info *rt; + if (!(rt0->rt6i_flags & RTF_EXPIRES) && rt0->from) + rt0->dst.expires = rt0->from->expires; - for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from); - if (rt && rt != rt0) - rt0->dst.expires = rt->dst.expires; dst_set_expires(&rt0->dst, timeout); rt0->rt6i_flags |= RTF_EXPIRES; } @@ -220,14 +249,14 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) * Return true if we can get cookie safely * Return false if not */ -static inline bool rt6_get_cookie_safe(const struct rt6_info *rt, +static inline bool rt6_get_cookie_safe(const struct fib6_info *f6i, u32 *cookie) { struct fib6_node *fn; bool status = false; rcu_read_lock(); - fn = rcu_dereference(rt->rt6i_node); + fn = rcu_dereference(f6i->fib6_node); if (fn) { *cookie = fn->fn_sernum; @@ -246,9 +275,7 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt) if (rt->rt6i_flags & RTF_PCPU || (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) - rt = rt->from; - - rt6_get_cookie_safe(rt, &cookie); + rt6_get_cookie_safe(rt->from, &cookie); return cookie; } @@ -262,20 +289,18 @@ static inline void ip6_rt_put(struct rt6_info *rt) dst_release(&rt->dst); } -void rt6_free_pcpu(struct rt6_info *non_pcpu_rt); +struct fib6_info *fib6_info_alloc(gfp_t gfp_flags); +void fib6_info_destroy(struct fib6_info *f6i); -static inline void rt6_hold(struct rt6_info *rt) +static inline void fib6_info_hold(struct fib6_info *f6i) { - atomic_inc(&rt->rt6i_ref); + atomic_inc(&f6i->fib6_ref); } -static inline void rt6_release(struct rt6_info *rt) +static inline void fib6_info_release(struct fib6_info *f6i) { - if (atomic_dec_and_test(&rt->rt6i_ref)) { - rt6_free_pcpu(rt); - dst_dev_put(&rt->dst); - dst_release(&rt->dst); - } + if (f6i && atomic_dec_and_test(&f6i->fib6_ref)) + fib6_info_destroy(f6i); } enum fib6_walk_state { @@ -291,7 +316,7 @@ enum fib6_walk_state { struct fib6_walker { struct list_head lh; struct fib6_node *root, *node; - struct rt6_info *leaf; + struct fib6_info *leaf; enum fib6_walk_state state; unsigned int skip; unsigned int count; @@ -355,7 +380,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_entry_notifier_info { struct fib_notifier_info info; /* must be first */ - struct rt6_info *rt; + struct fib6_info *rt; }; /* @@ -377,15 +402,19 @@ struct fib6_node *fib6_locate(struct fib6_node *root, const struct in6_addr *saddr, int src_len, bool exact_match); -void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), +void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg), void *arg); -int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc, - struct netlink_ext_ack *extack); -int fib6_del(struct rt6_info *rt, struct nl_info *info); +int fib6_add(struct fib6_node *root, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack); +int fib6_del(struct fib6_info *rt, struct nl_info *info); -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, +static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) +{ + return f6i->fib6_nh.nh_dev; +} + +void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); void fib6_run_gc(unsigned long expires, struct net *net, bool force); @@ -408,8 +437,14 @@ void __net_exit fib6_notifier_exit(struct net *net); unsigned int fib6_tables_seq_read(struct net *net); int fib6_tables_dump(struct net *net, struct notifier_block *nb); -void fib6_update_sernum(struct rt6_info *rt); -void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt); +void fib6_update_sernum(struct net *net, struct fib6_info *rt); +void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt); + +void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val); +static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) +{ + return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric)); +} #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 08b132381984..8df4ff798b04 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -66,9 +66,9 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } -static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt) +static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) { - return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == + return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == RTF_GATEWAY; } @@ -100,29 +100,29 @@ void ip6_route_cleanup(void); int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg); -int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack); -int ip6_ins_rt(struct rt6_info *); -int ip6_del_rt(struct rt6_info *); +int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); +int ip6_ins_rt(struct net *net, struct fib6_info *f6i); +int ip6_del_rt(struct net *net, struct fib6_info *f6i); -void rt6_flush_exceptions(struct rt6_info *rt); -int rt6_remove_exception_rt(struct rt6_info *rt); -void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args, +void rt6_flush_exceptions(struct fib6_info *f6i); +void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, unsigned long now); -static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, +static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) { - struct inet6_dev *idev = - rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; int err = 0; - if (rt && rt->rt6i_prefsrc.plen) - *saddr = rt->rt6i_prefsrc.addr; - else - err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, - daddr, prefs, saddr); + if (f6i && f6i->fib6_prefsrc.plen) { + *saddr = f6i->fib6_prefsrc.addr; + } else { + struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + + err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); + } return err; } @@ -137,8 +137,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); void fib6_force_start_gc(struct net *net); -struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, - const struct in6_addr *addr, bool anycast); +struct fib6_info *addrconf_f6i_alloc(struct net *net, struct inet6_dev *idev, + const struct in6_addr *addr, bool anycast, + gfp_t gfp_flags); struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, int flags); @@ -147,9 +148,11 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, * support functions for ND * */ -struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, +struct fib6_info *rt6_get_dflt_router(struct net *net, + const struct in6_addr *addr, struct net_device *dev); -struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, +struct fib6_info *rt6_add_dflt_router(struct net *net, + const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref); void rt6_purge_dflt_routers(struct net *net); @@ -174,14 +177,14 @@ struct rt6_rtnl_dump_arg { struct net *net; }; -int rt6_dump_route(struct rt6_info *rt, void *p_arg); +int rt6_dump_route(struct fib6_info *f6i, void *p_arg); void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); void rt6_sync_up(struct net_device *dev, unsigned int nh_flags); void rt6_disable_ip(struct net_device *dev, unsigned long event); void rt6_sync_down_dev(struct net_device *dev, unsigned long event); -void rt6_multipath_rebalance(struct rt6_info *rt); +void rt6_multipath_rebalance(struct fib6_info *f6i); void rt6_uncached_list_add(struct rt6_info *rt); void rt6_uncached_list_del(struct rt6_info *rt); @@ -269,12 +272,14 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, return daddr; } -static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b) +static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) { - return a->dst.dev == b->dst.dev && - a->rt6i_idev == b->rt6i_idev && - ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) && - !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate); + return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev && + ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) && + !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate); } +struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, + struct net_device *dev, struct sk_buff *skb, + const void *daddr); #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index c29f09cfc9d7..97b3a54579c8 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -60,7 +60,8 @@ struct netns_ipv6 { #endif struct xt_table *ip6table_nat; #endif - struct rt6_info *ip6_null_entry; + struct fib6_info *fib6_null_entry; + struct rt6_info *ip6_null_entry; struct rt6_statistics *rt6_stats; struct timer_list ip6_fib_timer; struct hlist_head *fib_table_hash; diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 33a70ece462f..d02e859301ff 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -276,6 +276,8 @@ enum LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */ LINUX_MIB_TCPMTUPFAIL, /* TCPMTUPFail */ LINUX_MIB_TCPMTUPSUCCESS, /* TCPMTUPSuccess */ + LINUX_MIB_TCPDELIVERED, /* TCPDelivered */ + LINUX_MIB_TCPDELIVEREDCE, /* TCPDeliveredCE */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 560374c978f9..379b08700a54 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -224,6 +224,9 @@ struct tcp_info { __u64 tcpi_busy_time; /* Time (usec) busy sending data */ __u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */ __u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */ + + __u32 tcpi_delivered; + __u32 tcpi_delivered_ce; }; /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ @@ -244,6 +247,8 @@ enum { TCP_NLA_SNDQ_SIZE, /* Data (bytes) pending in send queue */ TCP_NLA_CA_STATE, /* ca_state of socket */ TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */ + TCP_NLA_DELIVERED, /* Data pkts delivered incl. out-of-order */ + TCP_NLA_DELIVERED_CE, /* Like above but only ones w/ CE marks */ }; diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h index 3f29e3c8ed06..4b2c93b1934c 100644 --- a/include/uapi/linux/tipc_config.h +++ b/include/uapi/linux/tipc_config.h @@ -185,6 +185,11 @@ #define TIPC_DEF_LINK_WIN 50 #define TIPC_MAX_LINK_WIN 8191 +/* + * Default MTU for UDP media + */ + +#define TIPC_DEF_LINK_UDP_MTU 14000 struct tipc_node_info { __be32 addr; /* network address of node */ diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 0affb682e5e3..85c11982c89b 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -266,6 +266,7 @@ enum { TIPC_NLA_PROP_PRIO, /* u32 */ TIPC_NLA_PROP_TOL, /* u32 */ TIPC_NLA_PROP_WIN, /* u32 */ + TIPC_NLA_PROP_MTU, /* u32 */ __TIPC_NLA_PROP_MAX, TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 diff --git a/net/core/dev.c b/net/core/dev.c index 11c789231a03..3e8ac127a7e1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1285,6 +1285,7 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return len; } +EXPORT_SYMBOL(dev_set_alias); /** * dev_get_alias - get ifalias of a device diff --git a/net/core/dst.c b/net/core/dst.c index 007aa0b08291..2d9b37f8944a 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -58,6 +58,7 @@ const struct dst_metrics dst_default_metrics = { */ .refcnt = REFCOUNT_INIT(1), }; +EXPORT_SYMBOL(dst_default_metrics); void dst_init(struct dst_entry *dst, struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 45936922d7e2..80802546c279 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -785,13 +785,15 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error) { struct rta_cacheinfo ci = { - .rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse), - .rta_used = dst->__use, - .rta_clntref = atomic_read(&(dst->__refcnt)), .rta_error = error, .rta_id = id, }; + if (dst) { + ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse); + ci.rta_used = dst->__use; + ci.rta_clntref = atomic_read(&dst->__refcnt); + } if (expires) { unsigned long clock; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 345b51837ca8..ff49e352deea 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1839,6 +1839,20 @@ done: } EXPORT_SYMBOL(___pskb_trim); +/* Note : use pskb_trim_rcsum() instead of calling this directly + */ +int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + int delta = skb->len - len; + + skb->csum = csum_sub(skb->csum, + skb_checksum(skb, len, delta, 0)); + } + return __pskb_trim(skb, len); +} +EXPORT_SYMBOL(pskb_trim_rcsum_slow); + /** * __pskb_pull_tail - advance tail of skb header * @skb: buffer to reallocate diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index a07b7dd06def..b379520f9133 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -13,7 +13,8 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ - inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o + inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ + metrics.o obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c27122f01b87..6608db23f54b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1019,47 +1019,8 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) static int fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) { - bool ecn_ca = false; - struct nlattr *nla; - int remaining; - - if (!cfg->fc_mx) - return 0; - - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla_type(nla); - u32 val; - - if (!type) - continue; - if (type > RTAX_MAX) - return -EINVAL; - - if (type == RTAX_CC_ALGO) { - char tmp[TCP_CA_NAME_MAX]; - - nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); - if (val == TCP_CA_UNSPEC) - return -EINVAL; - } else { - val = nla_get_u32(nla); - } - if (type == RTAX_ADVMSS && val > 65535 - 40) - val = 65535 - 40; - if (type == RTAX_MTU && val > 65535 - 15) - val = 65535 - 15; - if (type == RTAX_HOPLIMIT && val > 255) - val = 255; - if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) - return -EINVAL; - fi->fib_metrics->metrics[type - 1] = val; - } - - if (ecn_ca) - fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; - - return 0; + return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len, + fi->fib_metrics->metrics); } struct fib_info *fib_create_info(struct fib_config *cfg, diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c new file mode 100644 index 000000000000..5121c6475e6b --- /dev/null +++ b/net/ipv4/metrics.c @@ -0,0 +1,53 @@ +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/types.h> +#include <net/ip.h> +#include <net/net_namespace.h> +#include <net/tcp.h> + +int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len, + u32 *metrics) +{ + bool ecn_ca = false; + struct nlattr *nla; + int remaining; + + if (!fc_mx) + return 0; + + nla_for_each_attr(nla, fc_mx, fc_mx_len, remaining) { + int type = nla_type(nla); + u32 val; + + if (!type) + continue; + if (type > RTAX_MAX) + return -EINVAL; + + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + + nla_strlcpy(tmp, nla, sizeof(tmp)); + val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); + if (val == TCP_CA_UNSPEC) + return -EINVAL; + } else { + val = nla_get_u32(nla); + } + if (type == RTAX_ADVMSS && val > 65535 - 40) + val = 65535 - 40; + if (type == RTAX_MTU && val > 65535 - 15) + val = 65535 - 15; + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; + if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) + return -EINVAL; + metrics[type - 1] = val; + } + + if (ecn_ca) + metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; + + return 0; +} +EXPORT_SYMBOL_GPL(ip_metrics_convert); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index a058de677e94..261b71d0ccc5 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -296,6 +296,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE), SNMP_MIB_ITEM("TCPMTUPFail", LINUX_MIB_TCPMTUPFAIL), SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS), + SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED), + SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 438fbca96cd3..4022073b0aee 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2559,6 +2559,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; + tp->delivered_ce = 0; tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; tcp_clear_retrans(tp); @@ -3166,6 +3167,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) rate64 = tcp_compute_delivery_rate(tp); if (rate64) info->tcpi_delivery_rate = rate64; + info->tcpi_delivered = tp->delivered; + info->tcpi_delivered_ce = tp->delivered_ce; unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -3179,7 +3182,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) u32 rate; stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) + - 5 * nla_total_size(sizeof(u32)) + + 7 * nla_total_size(sizeof(u32)) + 3 * nla_total_size(sizeof(u8)), GFP_ATOMIC); if (!stats) return NULL; @@ -3210,9 +3213,12 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits); nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh); + nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered); + nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce); nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una); nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); + return stats; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f93687f97d80..0396fb919b5d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3496,6 +3496,22 @@ static void tcp_xmit_recovery(struct sock *sk, int rexmit) tcp_xmit_retransmit_queue(sk); } +/* Returns the number of packets newly acked or sacked by the current ACK */ +static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag) +{ + const struct net *net = sock_net(sk); + struct tcp_sock *tp = tcp_sk(sk); + u32 delivered; + + delivered = tp->delivered - prior_delivered; + NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered); + if (flag & FLAG_ECE) { + tp->delivered_ce += delivered; + NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered); + } + return delivered; +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -3619,7 +3635,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) sk_dst_confirm(sk); - delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ + delivered = tcp_newly_delivered(sk, delivered, flag); lost = tp->lost - lost; /* freshly marked lost */ rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); @@ -3629,9 +3645,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ - if (flag & FLAG_DSACKING_ACK) + if (flag & FLAG_DSACKING_ACK) { tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, &rexmit); + tcp_newly_delivered(sk, delivered, flag); + } /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3655,6 +3673,7 @@ old_ack: &sack_state); tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, &rexmit); + tcp_newly_delivered(sk, delivered, flag); tcp_xmit_recovery(sk, rexmit); } @@ -5567,9 +5586,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, return true; } tp->syn_data_acked = tp->syn_data; - if (tp->syn_data_acked) - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPFASTOPENACTIVE); + if (tp->syn_data_acked) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); + /* SYN-data is counted as two separate packets in tcp_ack() */ + if (tp->delivered > 1) + --tp->delivered; + } tcp_fastopen_add_skb(sk, synack); @@ -5901,6 +5923,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) } switch (sk->sk_state) { case TCP_SYN_RECV: + tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */ if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b2c0175125db..7b4d7bbf2c17 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -170,7 +170,7 @@ static void addrconf_type_change(struct net_device *dev, unsigned long event); static int addrconf_ifdown(struct net_device *dev, int how); -static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, +static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, int plen, const struct net_device *dev, u32 flags, u32 noflags); @@ -916,7 +916,6 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) pr_warn("Freeing alive inet6 address %p\n", ifp); return; } - ip6_rt_put(ifp->rt); kfree_rcu(ifp, rcu); } @@ -995,7 +994,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC; struct net *net = dev_net(idev->dev); struct inet6_ifaddr *ifa = NULL; - struct rt6_info *rt = NULL; + struct fib6_info *f6i = NULL; int err = 0; int addr_type = ipv6_addr_type(addr); @@ -1037,16 +1036,16 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, goto out; } - rt = addrconf_dst_alloc(idev, addr, false); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - rt = NULL; + f6i = addrconf_f6i_alloc(net, idev, addr, false, gfp_flags); + if (IS_ERR(f6i)) { + err = PTR_ERR(f6i); + f6i = NULL; goto out; } if (net->ipv6.devconf_all->disable_policy || idev->cnf.disable_policy) - rt->dst.flags |= DST_NOPOLICY; + f6i->dst_nopolicy = true; neigh_parms_data_state_setall(idev->nd_parms); @@ -1068,7 +1067,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa->cstamp = ifa->tstamp = jiffies; ifa->tokenized = false; - ifa->rt = rt; + ifa->rt = f6i; ifa->idev = idev; in6_dev_hold(idev); @@ -1102,8 +1101,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, inet6addr_notifier_call_chain(NETDEV_UP, ifa); out: if (unlikely(err < 0)) { - if (rt) - ip6_rt_put(rt); + fib6_info_release(f6i); + if (ifa) { if (ifa->idev) in6_dev_put(ifa->idev); @@ -1179,19 +1178,19 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) static void cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt) { - struct rt6_info *rt; + struct fib6_info *f6i; - rt = addrconf_get_prefix_route(&ifp->addr, + f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, 0, RTF_GATEWAY | RTF_DEFAULT); - if (rt) { + if (f6i) { if (del_rt) - ip6_del_rt(rt); + ip6_del_rt(dev_net(ifp->idev->dev), f6i); else { - if (!(rt->rt6i_flags & RTF_EXPIRES)) - rt6_set_expires(rt, expires); - ip6_rt_put(rt); + if (!(f6i->fib6_flags & RTF_EXPIRES)) + fib6_set_expires(f6i, expires); + fib6_info_release(f6i); } } } @@ -2320,7 +2319,7 @@ static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad static void addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, - unsigned long expires, u32 flags) + unsigned long expires, u32 flags, gfp_t gfp_flags) { struct fib6_config cfg = { .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX, @@ -2331,6 +2330,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, .fc_flags = RTF_UP | flags, .fc_nlinfo.nl_net = dev_net(dev), .fc_protocol = RTPROT_KERNEL, + .fc_type = RTN_UNICAST, }; cfg.fc_dst = *pfx; @@ -2344,17 +2344,17 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, cfg.fc_flags |= RTF_NONEXTHOP; #endif - ip6_route_add(&cfg, NULL); + ip6_route_add(&cfg, gfp_flags, NULL); } -static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, +static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, int plen, const struct net_device *dev, u32 flags, u32 noflags) { struct fib6_node *fn; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; struct fib6_table *table; u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX; @@ -2368,14 +2368,13 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, goto out; for_each_fib6_node_rt_rcu(fn) { - if (rt->dst.dev->ifindex != dev->ifindex) + if (rt->fib6_nh.nh_dev->ifindex != dev->ifindex) continue; - if ((rt->rt6i_flags & flags) != flags) + if ((rt->fib6_flags & flags) != flags) continue; - if ((rt->rt6i_flags & noflags) != 0) + if ((rt->fib6_flags & noflags) != 0) continue; - if (!dst_hold_safe(&rt->dst)) - rt = NULL; + fib6_info_hold(rt); break; } out: @@ -2394,12 +2393,13 @@ static void addrconf_add_mroute(struct net_device *dev) .fc_ifindex = dev->ifindex, .fc_dst_len = 8, .fc_flags = RTF_UP, + .fc_type = RTN_UNICAST, .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); - ip6_route_add(&cfg, NULL); + ip6_route_add(&cfg, GFP_ATOMIC, NULL); } static struct inet6_dev *addrconf_add_dev(struct net_device *dev) @@ -2641,7 +2641,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) */ if (pinfo->onlink) { - struct rt6_info *rt; + struct fib6_info *rt; unsigned long rt_expires; /* Avoid arithmetic overflow. Really, we could @@ -2666,13 +2666,13 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) if (rt) { /* Autoconf prefix route */ if (valid_lft == 0) { - ip6_del_rt(rt); + ip6_del_rt(net, rt); rt = NULL; } else if (addrconf_finite_timeout(rt_expires)) { /* not infinity */ - rt6_set_expires(rt, jiffies + rt_expires); + fib6_set_expires(rt, jiffies + rt_expires); } else { - rt6_clean_expires(rt); + fib6_clean_expires(rt); } } else if (valid_lft) { clock_t expires = 0; @@ -2683,9 +2683,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) expires = jiffies_to_clock_t(rt_expires); } addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, - dev, expires, flags); + dev, expires, flags, GFP_ATOMIC); } - ip6_rt_put(rt); + fib6_info_release(rt); } /* Try to figure out our local address for this prefix */ @@ -2898,7 +2898,7 @@ static int inet6_addr_add(struct net *net, int ifindex, if (!IS_ERR(ifp)) { if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, - expires, flags); + expires, flags, GFP_KERNEL); } /* Send a netlink notification if DAD is enabled and @@ -3051,7 +3051,8 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) if (addr.s6_addr32[3]) { add_addr(idev, &addr, plen, scope); - addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags); + addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags, + GFP_ATOMIC); return; } @@ -3076,7 +3077,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) add_addr(idev, &addr, plen, flag); addrconf_prefix_route(&addr, plen, idev->dev, 0, - pflags); + pflags, GFP_ATOMIC); } } } @@ -3116,7 +3117,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev, ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL); if (!IS_ERR(ifp)) { - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, + 0, 0, GFP_ATOMIC); addrconf_dad_start(ifp); in6_ifa_put(ifp); } @@ -3231,7 +3233,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) addrconf_add_linklocal(idev, &addr, IFA_F_STABLE_PRIVACY); else if (prefix_route) - addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + addrconf_prefix_route(&addr, 64, idev->dev, + 0, 0, GFP_KERNEL); break; case IN6_ADDR_GEN_MODE_EUI64: /* addrconf_add_linklocal also adds a prefix_route and we @@ -3241,7 +3244,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0) addrconf_add_linklocal(idev, &addr, 0); else if (prefix_route) - addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + addrconf_prefix_route(&addr, 64, idev->dev, + 0, 0, GFP_KERNEL); break; case IN6_ADDR_GEN_MODE_NONE: default: @@ -3333,32 +3337,34 @@ static void addrconf_gre_config(struct net_device *dev) } #endif -static int fixup_permanent_addr(struct inet6_dev *idev, +static int fixup_permanent_addr(struct net *net, + struct inet6_dev *idev, struct inet6_ifaddr *ifp) { - /* !rt6i_node means the host route was removed from the + /* !fib6_node means the host route was removed from the * FIB, for example, if 'lo' device is taken down. In that * case regenerate the host route. */ - if (!ifp->rt || !ifp->rt->rt6i_node) { - struct rt6_info *rt, *prev; + if (!ifp->rt || !ifp->rt->fib6_node) { + struct fib6_info *f6i, *prev; - rt = addrconf_dst_alloc(idev, &ifp->addr, false); - if (IS_ERR(rt)) - return PTR_ERR(rt); + f6i = addrconf_f6i_alloc(net, idev, &ifp->addr, false, + GFP_ATOMIC); + if (IS_ERR(f6i)) + return PTR_ERR(f6i); /* ifp->rt can be accessed outside of rtnl */ spin_lock(&ifp->lock); prev = ifp->rt; - ifp->rt = rt; + ifp->rt = f6i; spin_unlock(&ifp->lock); - ip6_rt_put(prev); + fib6_info_release(prev); } if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, - idev->dev, 0, 0); + idev->dev, 0, 0, GFP_ATOMIC); } if (ifp->state == INET6_IFADDR_STATE_PREDAD) @@ -3367,7 +3373,7 @@ static int fixup_permanent_addr(struct inet6_dev *idev, return 0; } -static void addrconf_permanent_addr(struct net_device *dev) +static void addrconf_permanent_addr(struct net *net, struct net_device *dev) { struct inet6_ifaddr *ifp, *tmp; struct inet6_dev *idev; @@ -3380,7 +3386,7 @@ static void addrconf_permanent_addr(struct net_device *dev) list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) { if ((ifp->flags & IFA_F_PERMANENT) && - fixup_permanent_addr(idev, ifp) < 0) { + fixup_permanent_addr(net, idev, ifp) < 0) { write_unlock_bh(&idev->lock); in6_ifa_hold(ifp); ipv6_del_addr(ifp); @@ -3449,7 +3455,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (event == NETDEV_UP) { /* restore routes for permanent addresses */ - addrconf_permanent_addr(dev); + addrconf_permanent_addr(net, dev); if (!addrconf_link_ready(dev)) { /* device is not ready yet. */ @@ -3707,7 +3713,7 @@ restart: keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6); list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; bool keep; addrconf_del_dad_work(ifa); @@ -3735,7 +3741,7 @@ restart: spin_unlock_bh(&ifa->lock); if (rt) - ip6_del_rt(rt); + ip6_del_rt(net, rt); if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); @@ -3853,6 +3859,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; bool bump_id, notify = false; + struct net *net; addrconf_join_solict(dev, &ifp->addr); @@ -3863,8 +3870,9 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) if (ifp->state == INET6_IFADDR_STATE_DEAD) goto out; + net = dev_net(dev); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - (dev_net(dev)->ipv6.devconf_all->accept_dad < 1 && + (net->ipv6.devconf_all->accept_dad < 1 && idev->cnf.accept_dad < 1) || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { @@ -3900,8 +3908,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) * Frames right away */ if (ifp->flags & IFA_F_OPTIMISTIC) { - ip6_ins_rt(ifp->rt); - if (ipv6_use_optimistic_addr(dev_net(dev), idev)) { + ip6_ins_rt(net, ifp->rt); + if (ipv6_use_optimistic_addr(net, idev)) { /* Because optimistic nodes can use this address, * notify listeners. If DAD fails, RTM_DELADDR is sent. */ @@ -4567,8 +4575,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, ipv6_ifa_notify(0, ifp); if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, - expires, flags); + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, + ifp->idev->dev, expires, flags, + GFP_KERNEL); } else if (had_prefixroute) { enum cleanup_prefix_rt_t action; unsigned long rt_expires; @@ -4808,9 +4817,10 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, u32 portid, u32 seq, int event, unsigned int flags) { + struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt); + int ifindex = dev ? dev->ifindex : 1; struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; - int ifindex = ifaca->aca_idev->dev->ifindex; if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; @@ -5603,29 +5613,30 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) * our DAD process, so we don't need * to do it again */ - if (!rcu_access_pointer(ifp->rt->rt6i_node)) - ip6_ins_rt(ifp->rt); + if (!rcu_access_pointer(ifp->rt->fib6_node)) + ip6_ins_rt(net, ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); if (!ipv6_addr_any(&ifp->peer_addr)) addrconf_prefix_route(&ifp->peer_addr, 128, - ifp->idev->dev, 0, 0); + ifp->idev->dev, 0, 0, + GFP_ATOMIC); break; case RTM_DELADDR: if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); if (!ipv6_addr_any(&ifp->peer_addr)) { - struct rt6_info *rt; + struct fib6_info *rt; rt = addrconf_get_prefix_route(&ifp->peer_addr, 128, ifp->idev->dev, 0, 0); if (rt) - ip6_del_rt(rt); + ip6_del_rt(net, rt); } if (ifp->rt) { - if (dst_hold_safe(&ifp->rt->dst)) - ip6_del_rt(ifp->rt); + ip6_del_rt(net, ifp->rt); + ifp->rt = NULL; } rt_genid_bump_ipv6(net); break; @@ -5972,11 +5983,11 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) list_for_each_entry(ifa, &idev->addr_list, if_list) { spin_lock(&ifa->lock); if (ifa->rt) { - struct rt6_info *rt = ifa->rt; + struct fib6_info *rt = ifa->rt; int cpu; rcu_read_lock(); - addrconf_set_nopolicy(ifa->rt, val); + ifa->rt->dst_nopolicy = val ? true : false; if (rt->rt6i_pcpu) { for_each_possible_cpu(cpu) { struct rt6_info **rtp; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index bbcabbba9bd8..0250d199e527 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -212,16 +212,14 @@ static void aca_get(struct ifacaddr6 *aca) static void aca_put(struct ifacaddr6 *ac) { if (refcount_dec_and_test(&ac->aca_refcnt)) { - in6_dev_put(ac->aca_idev); - dst_release(&ac->aca_rt->dst); + fib6_info_release(ac->aca_rt); kfree(ac); } } -static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, +static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i, const struct in6_addr *addr) { - struct inet6_dev *idev = rt->rt6i_idev; struct ifacaddr6 *aca; aca = kzalloc(sizeof(*aca), GFP_ATOMIC); @@ -229,9 +227,8 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, return NULL; aca->aca_addr = *addr; - in6_dev_hold(idev); - aca->aca_idev = idev; - aca->aca_rt = rt; + fib6_info_hold(f6i); + aca->aca_rt = f6i; aca->aca_users = 1; /* aca_tstamp should be updated upon changes */ aca->aca_cstamp = aca->aca_tstamp = jiffies; @@ -246,7 +243,8 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifacaddr6 *aca; - struct rt6_info *rt; + struct fib6_info *f6i; + struct net *net; int err; ASSERT_RTNL(); @@ -265,14 +263,15 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) } } - rt = addrconf_dst_alloc(idev, addr, true); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); + net = dev_net(idev->dev); + f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC); + if (IS_ERR(f6i)) { + err = PTR_ERR(f6i); goto out; } - aca = aca_alloc(rt, addr); + aca = aca_alloc(f6i, addr); if (!aca) { - ip6_rt_put(rt); + fib6_info_release(f6i); err = -ENOMEM; goto out; } @@ -286,7 +285,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) aca_get(aca); write_unlock_bh(&idev->lock); - ip6_ins_rt(rt); + ip6_ins_rt(net, f6i); addrconf_join_solict(idev->dev, &aca->aca_addr); @@ -328,8 +327,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); - dst_hold(&aca->aca_rt->dst); - ip6_del_rt(aca->aca_rt); + ip6_del_rt(dev_net(idev->dev), aca->aca_rt); aca_put(aca); return 0; @@ -356,8 +354,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) addrconf_leave_solict(idev, &aca->aca_addr); - dst_hold(&aca->aca_rt->dst); - ip6_del_rt(aca->aca_rt); + ip6_del_rt(dev_net(idev->dev), aca->aca_rt); aca_put(aca); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index deab2db6692e..f936e91a8c65 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -43,7 +43,7 @@ static struct kmem_cache *fib6_node_kmem __read_mostly; struct fib6_cleaner { struct fib6_walker w; struct net *net; - int (*func)(struct rt6_info *, void *arg); + int (*func)(struct fib6_info *, void *arg); int sernum; void *arg; }; @@ -54,7 +54,7 @@ struct fib6_cleaner { #define FWS_INIT FWS_L #endif -static struct rt6_info *fib6_find_prefix(struct net *net, +static struct fib6_info *fib6_find_prefix(struct net *net, struct fib6_table *table, struct fib6_node *fn); static struct fib6_node *fib6_repair_tree(struct net *net, @@ -105,13 +105,12 @@ enum { FIB6_NO_SERNUM_CHANGE = 0, }; -void fib6_update_sernum(struct rt6_info *rt) +void fib6_update_sernum(struct net *net, struct fib6_info *f6i) { - struct net *net = dev_net(rt->dst.dev); struct fib6_node *fn; - fn = rcu_dereference_protected(rt->rt6i_node, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + fn = rcu_dereference_protected(f6i->fib6_node, + lockdep_is_held(&f6i->fib6_table->tb6_lock)); if (fn) fn->fn_sernum = fib6_new_sernum(net); } @@ -146,6 +145,69 @@ static __be32 addr_bit_set(const void *token, int fn_bit) addr[fn_bit >> 5]; } +struct fib6_info *fib6_info_alloc(gfp_t gfp_flags) +{ + struct fib6_info *f6i; + + f6i = kzalloc(sizeof(*f6i), gfp_flags); + if (!f6i) + return NULL; + + f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags); + if (!f6i->rt6i_pcpu) { + kfree(f6i); + return NULL; + } + + INIT_LIST_HEAD(&f6i->fib6_siblings); + f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; + + atomic_inc(&f6i->fib6_ref); + + return f6i; +} + +void fib6_info_destroy(struct fib6_info *f6i) +{ + struct rt6_exception_bucket *bucket; + struct dst_metrics *m; + + WARN_ON(f6i->fib6_node); + + bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1); + if (bucket) { + f6i->rt6i_exception_bucket = NULL; + kfree(bucket); + } + + if (f6i->rt6i_pcpu) { + int cpu; + + for_each_possible_cpu(cpu) { + struct rt6_info **ppcpu_rt; + struct rt6_info *pcpu_rt; + + ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu); + pcpu_rt = *ppcpu_rt; + if (pcpu_rt) { + dst_dev_put(&pcpu_rt->dst); + dst_release(&pcpu_rt->dst); + *ppcpu_rt = NULL; + } + } + } + + if (f6i->fib6_nh.nh_dev) + dev_put(f6i->fib6_nh.nh_dev); + + m = f6i->fib6_metrics; + if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) + kfree(m); + + kfree(f6i); +} +EXPORT_SYMBOL_GPL(fib6_info_destroy); + static struct fib6_node *node_alloc(struct net *net) { struct fib6_node *fn; @@ -176,28 +238,6 @@ static void node_free(struct net *net, struct fib6_node *fn) net->ipv6.rt6_stats->fib_nodes--; } -void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) -{ - int cpu; - - if (!non_pcpu_rt->rt6i_pcpu) - return; - - for_each_possible_cpu(cpu) { - struct rt6_info **ppcpu_rt; - struct rt6_info *pcpu_rt; - - ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu); - pcpu_rt = *ppcpu_rt; - if (pcpu_rt) { - dst_dev_put(&pcpu_rt->dst); - dst_release(&pcpu_rt->dst); - *ppcpu_rt = NULL; - } - } -} -EXPORT_SYMBOL_GPL(rt6_free_pcpu); - static void fib6_free_table(struct fib6_table *table) { inetpeer_invalidate_tree(&table->tb6_peers); @@ -232,7 +272,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) if (table) { table->tb6_id = id; rcu_assign_pointer(table->tb6_root.leaf, - net->ipv6.ip6_null_entry); + net->ipv6.fib6_null_entry); table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&table->tb6_peers); } @@ -340,7 +380,7 @@ unsigned int fib6_tables_seq_read(struct net *net) static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, - struct rt6_info *rt) + struct fib6_info *rt) { struct fib6_entry_notifier_info info = { .rt = rt, @@ -351,7 +391,7 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, static int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, - struct rt6_info *rt, + struct fib6_info *rt, struct netlink_ext_ack *extack) { struct fib6_entry_notifier_info info = { @@ -359,7 +399,7 @@ static int call_fib6_entry_notifiers(struct net *net, .rt = rt, }; - rt->rt6i_table->fib_seq++; + rt->fib6_table->fib_seq++; return call_fib6_notifiers(net, event_type, &info.info); } @@ -368,16 +408,16 @@ struct fib6_dump_arg { struct notifier_block *nb; }; -static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg) +static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg) { - if (rt == arg->net->ipv6.ip6_null_entry) + if (rt == arg->net->ipv6.fib6_null_entry) return; call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt); } static int fib6_node_dump(struct fib6_walker *w) { - struct rt6_info *rt; + struct fib6_info *rt; for_each_fib6_walker_rt(w) fib6_rt_dump(rt, w->args); @@ -426,7 +466,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb) static int fib6_dump_node(struct fib6_walker *w) { int res; - struct rt6_info *rt; + struct fib6_info *rt; for_each_fib6_walker_rt(w) { res = rt6_dump_route(rt, w->args); @@ -441,10 +481,10 @@ static int fib6_dump_node(struct fib6_walker *w) * last sibling of this route (no need to dump the * sibling routes again) */ - if (rt->rt6i_nsiblings) - rt = list_last_entry(&rt->rt6i_siblings, - struct rt6_info, - rt6i_siblings); + if (rt->fib6_nsiblings) + rt = list_last_entry(&rt->fib6_siblings, + struct fib6_info, + fib6_siblings); } w->leaf = NULL; return 0; @@ -579,6 +619,24 @@ out: return res; } +void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val) +{ + if (!f6i) + return; + + if (f6i->fib6_metrics == &dst_default_metrics) { + struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC); + + if (!p) + return; + + refcount_set(&p->refcnt, 1); + f6i->fib6_metrics = p; + } + + f6i->fib6_metrics->metrics[metric - 1] = val; +} + /* * Routing Table * @@ -608,7 +666,7 @@ static struct fib6_node *fib6_add_1(struct net *net, fn = root; do { - struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&table->tb6_lock)); key = (struct rt6key *)((u8 *)leaf + offset); @@ -637,11 +695,11 @@ static struct fib6_node *fib6_add_1(struct net *net, /* clean up an intermediate node */ if (!(fn->fn_flags & RTN_RTINFO)) { RCU_INIT_POINTER(fn->leaf, NULL); - rt6_release(leaf); + fib6_info_release(leaf); /* remove null_entry in the root node */ } else if (fn->fn_flags & RTN_TL_ROOT && rcu_access_pointer(fn->leaf) == - net->ipv6.ip6_null_entry) { + net->ipv6.fib6_null_entry) { RCU_INIT_POINTER(fn->leaf, NULL); } @@ -750,7 +808,7 @@ insert_above: RCU_INIT_POINTER(in->parent, pn); in->leaf = fn->leaf; atomic_inc(&rcu_dereference_protected(in->leaf, - lockdep_is_held(&table->tb6_lock))->rt6i_ref); + lockdep_is_held(&table->tb6_lock))->fib6_ref); /* update parent pointer */ if (dir) @@ -802,44 +860,12 @@ insert_above: return ln; } -static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc) -{ - int i; - - for (i = 0; i < RTAX_MAX; i++) { - if (test_bit(i, mxc->mx_valid)) - mp[i] = mxc->mx[i]; - } -} - -static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc) -{ - if (!mxc->mx) - return 0; - - if (dst->flags & DST_HOST) { - u32 *mp = dst_metrics_write_ptr(dst); - - if (unlikely(!mp)) - return -ENOMEM; - - fib6_copy_metrics(mp, mxc); - } else { - dst_init_metrics(dst, mxc->mx, false); - - /* We've stolen mx now. */ - mxc->mx = NULL; - } - - return 0; -} - -static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, +static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, struct net *net) { - struct fib6_table *table = rt->rt6i_table; + struct fib6_table *table = rt->fib6_table; - if (atomic_read(&rt->rt6i_ref) != 1) { + if (atomic_read(&rt->fib6_ref) != 1) { /* This route is used as dummy address holder in some split * nodes. It is not leaked, but it still holds other resources, * which must be released in time. So, scan ascendant nodes @@ -847,18 +873,38 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, * to still alive ones. */ while (fn) { - struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *new_leaf; + struct fib6_info *new_leaf; if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) { new_leaf = fib6_find_prefix(net, table, fn); - atomic_inc(&new_leaf->rt6i_ref); + atomic_inc(&new_leaf->fib6_ref); + rcu_assign_pointer(fn->leaf, new_leaf); - rt6_release(rt); + fib6_info_release(rt); } fn = rcu_dereference_protected(fn->parent, lockdep_is_held(&table->tb6_lock)); } + + if (rt->rt6i_pcpu) { + int cpu; + + /* release the reference to this fib entry from + * all of its cached pcpu routes + */ + for_each_possible_cpu(cpu) { + struct rt6_info **ppcpu_rt; + struct rt6_info *pcpu_rt; + + ppcpu_rt = per_cpu_ptr(rt->rt6i_pcpu, cpu); + pcpu_rt = *ppcpu_rt; + if (pcpu_rt) { + fib6_info_release(pcpu_rt->from); + pcpu_rt->from = NULL; + } + } + } } } @@ -866,15 +912,15 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, * Insert routing information in a node. */ -static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc, +static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack) { - struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); - struct rt6_info *iter = NULL; - struct rt6_info __rcu **ins; - struct rt6_info __rcu **fallback_ins = NULL; + struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, + lockdep_is_held(&rt->fib6_table->tb6_lock)); + struct fib6_info *iter = NULL; + struct fib6_info __rcu **ins; + struct fib6_info __rcu **fallback_ins = NULL; int replace = (info->nlh && (info->nlh->nlmsg_flags & NLM_F_REPLACE)); int add = (!info->nlh || @@ -891,12 +937,12 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, for (iter = leaf; iter; iter = rcu_dereference_protected(iter->rt6_next, - lockdep_is_held(&rt->rt6i_table->tb6_lock))) { + lockdep_is_held(&rt->fib6_table->tb6_lock))) { /* * Search for duplicates */ - if (iter->rt6i_metric == rt->rt6i_metric) { + if (iter->fib6_metric == rt->fib6_metric) { /* * Same priority level */ @@ -916,15 +962,15 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, } if (rt6_duplicate_nexthop(iter, rt)) { - if (rt->rt6i_nsiblings) - rt->rt6i_nsiblings = 0; - if (!(iter->rt6i_flags & RTF_EXPIRES)) + if (rt->fib6_nsiblings) + rt->fib6_nsiblings = 0; + if (!(iter->fib6_flags & RTF_EXPIRES)) return -EEXIST; - if (!(rt->rt6i_flags & RTF_EXPIRES)) - rt6_clean_expires(iter); + if (!(rt->fib6_flags & RTF_EXPIRES)) + fib6_clean_expires(iter); else - rt6_set_expires(iter, rt->dst.expires); - iter->rt6i_pmtu = rt->rt6i_pmtu; + fib6_set_expires(iter, rt->expires); + fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu); return -EEXIST; } /* If we have the same destination and the same metric, @@ -940,10 +986,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, */ if (rt_can_ecmp && rt6_qualify_for_ecmp(iter)) - rt->rt6i_nsiblings++; + rt->fib6_nsiblings++; } - if (iter->rt6i_metric > rt->rt6i_metric) + if (iter->fib6_metric > rt->fib6_metric) break; next_iter: @@ -954,7 +1000,7 @@ next_iter: /* No ECMP-able route found, replace first non-ECMP one */ ins = fallback_ins; iter = rcu_dereference_protected(*ins, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); found++; } @@ -963,34 +1009,34 @@ next_iter: fn->rr_ptr = NULL; /* Link this route to others same route. */ - if (rt->rt6i_nsiblings) { - unsigned int rt6i_nsiblings; - struct rt6_info *sibling, *temp_sibling; + if (rt->fib6_nsiblings) { + unsigned int fib6_nsiblings; + struct fib6_info *sibling, *temp_sibling; /* Find the first route that have the same metric */ sibling = leaf; while (sibling) { - if (sibling->rt6i_metric == rt->rt6i_metric && + if (sibling->fib6_metric == rt->fib6_metric && rt6_qualify_for_ecmp(sibling)) { - list_add_tail(&rt->rt6i_siblings, - &sibling->rt6i_siblings); + list_add_tail(&rt->fib6_siblings, + &sibling->fib6_siblings); break; } sibling = rcu_dereference_protected(sibling->rt6_next, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); } /* For each sibling in the list, increment the counter of * siblings. BUG() if counters does not match, list of siblings * is broken! */ - rt6i_nsiblings = 0; + fib6_nsiblings = 0; list_for_each_entry_safe(sibling, temp_sibling, - &rt->rt6i_siblings, rt6i_siblings) { - sibling->rt6i_nsiblings++; - BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings); - rt6i_nsiblings++; + &rt->fib6_siblings, fib6_siblings) { + sibling->fib6_nsiblings++; + BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings); + fib6_nsiblings++; } - BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); + BUG_ON(fib6_nsiblings != rt->fib6_nsiblings); rt6_multipath_rebalance(temp_sibling); } @@ -1003,9 +1049,6 @@ next_iter: add: nlflags |= NLM_F_CREATE; - err = fib6_commit_metrics(&rt->dst, mxc); - if (err) - return err; err = call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD, @@ -1014,8 +1057,8 @@ add: return err; rcu_assign_pointer(rt->rt6_next, iter); - atomic_inc(&rt->rt6i_ref); - rcu_assign_pointer(rt->rt6i_node, fn); + atomic_inc(&rt->fib6_ref); + rcu_assign_pointer(rt->fib6_node, fn); rcu_assign_pointer(*ins, rt); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); @@ -1036,18 +1079,14 @@ add: return -ENOENT; } - err = fib6_commit_metrics(&rt->dst, mxc); - if (err) - return err; - err = call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, rt, extack); if (err) return err; - atomic_inc(&rt->rt6i_ref); - rcu_assign_pointer(rt->rt6i_node, fn); + atomic_inc(&rt->fib6_ref); + rcu_assign_pointer(rt->fib6_node, fn); rt->rt6_next = iter->rt6_next; rcu_assign_pointer(*ins, rt); if (!info->skip_notify) @@ -1056,35 +1095,35 @@ add: info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; } - nsiblings = iter->rt6i_nsiblings; - iter->rt6i_node = NULL; + nsiblings = iter->fib6_nsiblings; + iter->fib6_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); if (rcu_access_pointer(fn->rr_ptr) == iter) fn->rr_ptr = NULL; - rt6_release(iter); + fib6_info_release(iter); if (nsiblings) { /* Replacing an ECMP route, remove all siblings */ ins = &rt->rt6_next; iter = rcu_dereference_protected(*ins, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); while (iter) { - if (iter->rt6i_metric > rt->rt6i_metric) + if (iter->fib6_metric > rt->fib6_metric) break; if (rt6_qualify_for_ecmp(iter)) { *ins = iter->rt6_next; - iter->rt6i_node = NULL; + iter->fib6_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); if (rcu_access_pointer(fn->rr_ptr) == iter) fn->rr_ptr = NULL; - rt6_release(iter); + fib6_info_release(iter); nsiblings--; info->nl_net->ipv6.rt6_stats->fib_rt_entries--; } else { ins = &iter->rt6_next; } iter = rcu_dereference_protected(*ins, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); } WARN_ON(nsiblings != 0); } @@ -1093,10 +1132,10 @@ add: return 0; } -static void fib6_start_gc(struct net *net, struct rt6_info *rt) +static void fib6_start_gc(struct net *net, struct fib6_info *rt) { if (!timer_pending(&net->ipv6.ip6_fib_timer) && - (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) + (rt->fib6_flags & RTF_EXPIRES)) mod_timer(&net->ipv6.ip6_fib_timer, jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } @@ -1108,22 +1147,22 @@ void fib6_force_start_gc(struct net *net) jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } -static void __fib6_update_sernum_upto_root(struct rt6_info *rt, +static void __fib6_update_sernum_upto_root(struct fib6_info *rt, int sernum) { - struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&rt->fib6_table->tb6_lock)); /* paired with smp_rmb() in rt6_get_cookie_safe() */ smp_wmb(); while (fn) { fn->fn_sernum = sernum; fn = rcu_dereference_protected(fn->parent, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); } } -void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt) +void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt) { __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net)); } @@ -1135,22 +1174,16 @@ void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt) * Need to own table->tb6_lock */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc, - struct netlink_ext_ack *extack) +int fib6_add(struct fib6_node *root, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack) { - struct fib6_table *table = rt->rt6i_table; + struct fib6_table *table = rt->fib6_table; struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; int allow_create = 1; int replace_required = 0; int sernum = fib6_new_sernum(info->nl_net); - if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt))) - return -EINVAL; - if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE)) - return -EINVAL; - if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) allow_create = 0; @@ -1161,8 +1194,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); fn = fib6_add_1(info->nl_net, table, root, - &rt->rt6i_dst.addr, rt->rt6i_dst.plen, - offsetof(struct rt6_info, rt6i_dst), allow_create, + &rt->fib6_dst.addr, rt->fib6_dst.plen, + offsetof(struct fib6_info, fib6_dst), allow_create, replace_required, extack); if (IS_ERR(fn)) { err = PTR_ERR(fn); @@ -1173,7 +1206,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, pn = fn; #ifdef CONFIG_IPV6_SUBTREES - if (rt->rt6i_src.plen) { + if (rt->fib6_src.plen) { struct fib6_node *sn; if (!rcu_access_pointer(fn->subtree)) { @@ -1194,16 +1227,16 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (!sfn) goto failure; - atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); + atomic_inc(&info->nl_net->ipv6.fib6_null_entry->fib6_ref); rcu_assign_pointer(sfn->leaf, - info->nl_net->ipv6.ip6_null_entry); + info->nl_net->ipv6.fib6_null_entry); sfn->fn_flags = RTN_ROOT; /* Now add the first leaf node to new subtree */ sn = fib6_add_1(info->nl_net, table, sfn, - &rt->rt6i_src.addr, rt->rt6i_src.plen, - offsetof(struct rt6_info, rt6i_src), + &rt->fib6_src.addr, rt->fib6_src.plen, + offsetof(struct fib6_info, fib6_src), allow_create, replace_required, extack); if (IS_ERR(sn)) { @@ -1221,8 +1254,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, rcu_assign_pointer(fn->subtree, sfn); } else { sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn), - &rt->rt6i_src.addr, rt->rt6i_src.plen, - offsetof(struct rt6_info, rt6i_src), + &rt->fib6_src.addr, rt->fib6_src.plen, + offsetof(struct fib6_info, fib6_src), allow_create, replace_required, extack); if (IS_ERR(sn)) { @@ -1235,9 +1268,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (fn->fn_flags & RTN_TL_ROOT) { /* put back null_entry for root node */ rcu_assign_pointer(fn->leaf, - info->nl_net->ipv6.ip6_null_entry); + info->nl_net->ipv6.fib6_null_entry); } else { - atomic_inc(&rt->rt6i_ref); + atomic_inc(&rt->fib6_ref); rcu_assign_pointer(fn->leaf, rt); } } @@ -1245,7 +1278,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, } #endif - err = fib6_add_rt2node(fn, rt, info, mxc, extack); + err = fib6_add_rt2node(fn, rt, info, extack); if (!err) { __fib6_update_sernum_upto_root(rt, sernum); fib6_start_gc(info->nl_net, rt); @@ -1259,13 +1292,13 @@ out: * super-tree leaf node we have to find a new one for it. */ if (pn != fn) { - struct rt6_info *pn_leaf = + struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf, lockdep_is_held(&table->tb6_lock)); if (pn_leaf == rt) { pn_leaf = NULL; RCU_INIT_POINTER(pn->leaf, NULL); - atomic_dec(&rt->rt6i_ref); + fib6_info_release(rt); } if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { pn_leaf = fib6_find_prefix(info->nl_net, table, @@ -1274,10 +1307,10 @@ out: if (!pn_leaf) { WARN_ON(!pn_leaf); pn_leaf = - info->nl_net->ipv6.ip6_null_entry; + info->nl_net->ipv6.fib6_null_entry; } #endif - atomic_inc(&pn_leaf->rt6i_ref); + fib6_info_hold(pn_leaf); rcu_assign_pointer(pn->leaf, pn_leaf); } } @@ -1299,10 +1332,6 @@ failure: (fn->fn_flags & RTN_TL_ROOT && !rcu_access_pointer(fn->leaf)))) fib6_repair_tree(info->nl_net, table, fn); - /* Always release dst as dst->__refcnt is guaranteed - * to be taken before entering this function - */ - dst_release_immediate(&rt->dst); return err; } @@ -1312,7 +1341,7 @@ failure: */ struct lookup_args { - int offset; /* key offset on rt6_info */ + int offset; /* key offset on fib6_info */ const struct in6_addr *addr; /* search key */ }; @@ -1350,7 +1379,7 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root, struct fib6_node *subtree = FIB6_SUBTREE(fn); if (subtree || fn->fn_flags & RTN_RTINFO) { - struct rt6_info *leaf = rcu_dereference(fn->leaf); + struct fib6_info *leaf = rcu_dereference(fn->leaf); struct rt6key *key; if (!leaf) @@ -1390,12 +1419,12 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad struct fib6_node *fn; struct lookup_args args[] = { { - .offset = offsetof(struct rt6_info, rt6i_dst), + .offset = offsetof(struct fib6_info, fib6_dst), .addr = daddr, }, #ifdef CONFIG_IPV6_SUBTREES { - .offset = offsetof(struct rt6_info, rt6i_src), + .offset = offsetof(struct fib6_info, fib6_src), .addr = saddr, }, #endif @@ -1431,7 +1460,7 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root, struct fib6_node *fn, *prev = NULL; for (fn = root; fn ; ) { - struct rt6_info *leaf = rcu_dereference(fn->leaf); + struct fib6_info *leaf = rcu_dereference(fn->leaf); struct rt6key *key; /* This node is being deleted */ @@ -1480,7 +1509,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root, struct fib6_node *fn; fn = fib6_locate_1(root, daddr, dst_len, - offsetof(struct rt6_info, rt6i_dst), + offsetof(struct fib6_info, fib6_dst), exact_match); #ifdef CONFIG_IPV6_SUBTREES @@ -1491,7 +1520,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root, if (subtree) { fn = fib6_locate_1(subtree, saddr, src_len, - offsetof(struct rt6_info, rt6i_src), + offsetof(struct fib6_info, fib6_src), exact_match); } } @@ -1510,14 +1539,14 @@ struct fib6_node *fib6_locate(struct fib6_node *root, * */ -static struct rt6_info *fib6_find_prefix(struct net *net, +static struct fib6_info *fib6_find_prefix(struct net *net, struct fib6_table *table, struct fib6_node *fn) { struct fib6_node *child_left, *child_right; if (fn->fn_flags & RTN_ROOT) - return net->ipv6.ip6_null_entry; + return net->ipv6.fib6_null_entry; while (fn) { child_left = rcu_dereference_protected(fn->left, @@ -1554,7 +1583,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, /* Set fn->leaf to null_entry for root node. */ if (fn->fn_flags & RTN_TL_ROOT) { - rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry); + rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry); return fn; } @@ -1569,11 +1598,11 @@ static struct fib6_node *fib6_repair_tree(struct net *net, lockdep_is_held(&table->tb6_lock)); struct fib6_node *pn_l = rcu_dereference_protected(pn->left, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf, + struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, + struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *new_fn_leaf; + struct fib6_info *new_fn_leaf; RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); iter++; @@ -1599,10 +1628,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, #if RT6_DEBUG >= 2 if (!new_fn_leaf) { WARN_ON(!new_fn_leaf); - new_fn_leaf = net->ipv6.ip6_null_entry; + new_fn_leaf = net->ipv6.fib6_null_entry; } #endif - atomic_inc(&new_fn_leaf->rt6i_ref); + fib6_info_hold(new_fn_leaf); rcu_assign_pointer(fn->leaf, new_fn_leaf); return pn; } @@ -1658,26 +1687,24 @@ static struct fib6_node *fib6_repair_tree(struct net *net, return pn; RCU_INIT_POINTER(pn->leaf, NULL); - rt6_release(pn_leaf); + fib6_info_release(pn_leaf); fn = pn; } } static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, - struct rt6_info __rcu **rtp, struct nl_info *info) + struct fib6_info __rcu **rtp, struct nl_info *info) { struct fib6_walker *w; - struct rt6_info *rt = rcu_dereference_protected(*rtp, + struct fib6_info *rt = rcu_dereference_protected(*rtp, lockdep_is_held(&table->tb6_lock)); struct net *net = info->nl_net; RT6_TRACE("fib6_del_route\n"); - WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE); - /* Unlink it */ *rtp = rt->rt6_next; - rt->rt6i_node = NULL; + rt->fib6_node = NULL; net->ipv6.rt6_stats->fib_rt_entries--; net->ipv6.rt6_stats->fib_discarded_routes++; @@ -1689,14 +1716,14 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, fn->rr_ptr = NULL; /* Remove this entry from other siblings */ - if (rt->rt6i_nsiblings) { - struct rt6_info *sibling, *next_sibling; + if (rt->fib6_nsiblings) { + struct fib6_info *sibling, *next_sibling; list_for_each_entry_safe(sibling, next_sibling, - &rt->rt6i_siblings, rt6i_siblings) - sibling->rt6i_nsiblings--; - rt->rt6i_nsiblings = 0; - list_del_init(&rt->rt6i_siblings); + &rt->fib6_siblings, fib6_siblings) + sibling->fib6_nsiblings--; + rt->fib6_nsiblings = 0; + list_del_init(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); } @@ -1730,40 +1757,30 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL); if (!info->skip_notify) inet6_rt_notify(RTM_DELROUTE, rt, info, 0); - rt6_release(rt); + fib6_info_release(rt); } /* Need to own table->tb6_lock */ -int fib6_del(struct rt6_info *rt, struct nl_info *info) +int fib6_del(struct fib6_info *rt, struct nl_info *info) { - struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); - struct fib6_table *table = rt->rt6i_table; + struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&rt->fib6_table->tb6_lock)); + struct fib6_table *table = rt->fib6_table; struct net *net = info->nl_net; - struct rt6_info __rcu **rtp; - struct rt6_info __rcu **rtp_next; + struct fib6_info __rcu **rtp; + struct fib6_info __rcu **rtp_next; -#if RT6_DEBUG >= 2 - if (rt->dst.obsolete > 0) { - WARN_ON(fn); - return -ENOENT; - } -#endif - if (!fn || rt == net->ipv6.ip6_null_entry) + if (!fn || rt == net->ipv6.fib6_null_entry) return -ENOENT; WARN_ON(!(fn->fn_flags & RTN_RTINFO)); - /* remove cached dst from exception table */ - if (rt->rt6i_flags & RTF_CACHE) - return rt6_remove_exception_rt(rt); - /* * Walk the leaf entries looking for ourself */ for (rtp = &fn->leaf; *rtp; rtp = rtp_next) { - struct rt6_info *cur = rcu_dereference_protected(*rtp, + struct fib6_info *cur = rcu_dereference_protected(*rtp, lockdep_is_held(&table->tb6_lock)); if (rt == cur) { fib6_del_route(table, fn, rtp, info); @@ -1907,7 +1924,7 @@ static int fib6_walk(struct net *net, struct fib6_walker *w) static int fib6_clean_node(struct fib6_walker *w) { int res; - struct rt6_info *rt; + struct fib6_info *rt; struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w); struct nl_info info = { .nl_net = c->net, @@ -1932,17 +1949,17 @@ static int fib6_clean_node(struct fib6_walker *w) #if RT6_DEBUG >= 2 pr_debug("%s: del failed: rt=%p@%p err=%d\n", __func__, rt, - rcu_access_pointer(rt->rt6i_node), + rcu_access_pointer(rt->fib6_node), res); #endif continue; } return 0; } else if (res == -2) { - if (WARN_ON(!rt->rt6i_nsiblings)) + if (WARN_ON(!rt->fib6_nsiblings)) continue; - rt = list_last_entry(&rt->rt6i_siblings, - struct rt6_info, rt6i_siblings); + rt = list_last_entry(&rt->fib6_siblings, + struct fib6_info, fib6_siblings); continue; } WARN_ON(res != 0); @@ -1961,7 +1978,7 @@ static int fib6_clean_node(struct fib6_walker *w) */ static void fib6_clean_tree(struct net *net, struct fib6_node *root, - int (*func)(struct rt6_info *, void *arg), + int (*func)(struct fib6_info *, void *arg), int sernum, void *arg) { struct fib6_cleaner c; @@ -1979,7 +1996,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, } static void __fib6_clean_all(struct net *net, - int (*func)(struct rt6_info *, void *), + int (*func)(struct fib6_info *, void *), int sernum, void *arg) { struct fib6_table *table; @@ -1999,7 +2016,7 @@ static void __fib6_clean_all(struct net *net, rcu_read_unlock(); } -void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *), +void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *), void *arg) { __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg); @@ -2016,7 +2033,7 @@ static void fib6_flush_trees(struct net *net) * Garbage collection */ -static int fib6_age(struct rt6_info *rt, void *arg) +static int fib6_age(struct fib6_info *rt, void *arg) { struct fib6_gc_args *gc_args = arg; unsigned long now = jiffies; @@ -2026,8 +2043,8 @@ static int fib6_age(struct rt6_info *rt, void *arg) * Routes are expired even if they are in use. */ - if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { - if (time_after(now, rt->dst.expires)) { + if (rt->fib6_flags & RTF_EXPIRES && rt->expires) { + if (time_after(now, rt->expires)) { RT6_TRACE("expiring %p\n", rt); return -1; } @@ -2110,7 +2127,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf, - net->ipv6.ip6_null_entry); + net->ipv6.fib6_null_entry); net->ipv6.fib6_main_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); @@ -2122,7 +2139,7 @@ static int __net_init fib6_net_init(struct net *net) goto out_fib6_main_tbl; net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf, - net->ipv6.ip6_null_entry); + net->ipv6.fib6_null_entry); net->ipv6.fib6_local_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); @@ -2220,25 +2237,26 @@ struct ipv6_route_iter { static int ipv6_route_seq_show(struct seq_file *seq, void *v) { - struct rt6_info *rt = v; + struct fib6_info *rt = v; struct ipv6_route_iter *iter = seq->private; + const struct net_device *dev; - seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); + seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen); #ifdef CONFIG_IPV6_SUBTREES - seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); + seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen); #else seq_puts(seq, "00000000000000000000000000000000 00 "); #endif - if (rt->rt6i_flags & RTF_GATEWAY) - seq_printf(seq, "%pi6", &rt->rt6i_gateway); + if (rt->fib6_flags & RTF_GATEWAY) + seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw); else seq_puts(seq, "00000000000000000000000000000000"); + dev = rt->fib6_nh.nh_dev; seq_printf(seq, " %08x %08x %08x %08x %8s\n", - rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), - rt->dst.__use, rt->rt6i_flags, - rt->dst.dev ? rt->dst.dev->name : ""); + rt->fib6_metric, atomic_read(&rt->fib6_ref), 0, + rt->fib6_flags, dev ? dev->name : ""); iter->w.leaf = NULL; return 0; } @@ -2311,14 +2329,14 @@ static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { int r; - struct rt6_info *n; + struct fib6_info *n; struct net *net = seq_file_net(seq); struct ipv6_route_iter *iter = seq->private; if (!v) goto iter_table; - n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next); + n = rcu_dereference_bh(((struct fib6_info *)v)->rt6_next); if (n) { ++*pos; return n; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a39b04f9fa6e..3db47986ef38 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -968,7 +968,8 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, if (!had_dst) *dst = ip6_route_output(net, sk, fl6); rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; - err = ip6_route_get_saddr(net, rt, &fl6->daddr, + err = ip6_route_get_saddr(net, rt ? rt->from : NULL, + &fl6->daddr, sk ? inet6_sk(sk)->srcprefs : 0, &fl6->saddr); if (err) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 9de4dfb126ba..9ac5366064e3 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1155,7 +1155,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); struct neighbour *neigh = NULL; struct inet6_dev *in6_dev; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; + struct net *net; int lifetime; struct ndisc_options ndopts; int optlen; @@ -1253,9 +1254,9 @@ static void ndisc_router_discovery(struct sk_buff *skb) /* Do not accept RA with source-addr found on local machine unless * accept_ra_from_local is set to true. */ + net = dev_net(in6_dev->dev); if (!in6_dev->cnf.accept_ra_from_local && - ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, - in6_dev->dev, 0)) { + ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, "RA from local address detected on dev: %s: default router ignored\n", skb->dev->name); @@ -1272,20 +1273,22 @@ static void ndisc_router_discovery(struct sk_buff *skb) pref = ICMPV6_ROUTER_PREF_MEDIUM; #endif - rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev); + rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev); if (rt) { - neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); + neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw, + rt->fib6_nh.nh_dev, NULL, + &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); - ip6_rt_put(rt); + fib6_info_release(rt); return; } } if (rt && lifetime == 0) { - ip6_del_rt(rt); + ip6_del_rt(net, rt); rt = NULL; } @@ -1294,7 +1297,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (!rt && lifetime) { ND_PRINTK(3, info, "RA: adding default router\n"); - rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref); + rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr, + skb->dev, pref); if (!rt) { ND_PRINTK(0, err, "RA: %s failed to add default route\n", @@ -1302,28 +1306,29 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); + neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw, + rt->fib6_nh.nh_dev, NULL, + &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); - ip6_rt_put(rt); + fib6_info_release(rt); return; } neigh->flags |= NTF_ROUTER; } else if (rt) { - rt->rt6i_flags = (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); + rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); } if (rt) - rt6_set_expires(rt, jiffies + (HZ * lifetime)); + fib6_set_expires(rt, jiffies + (HZ * lifetime)); if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && ra_msg->icmph.icmp6_hop_limit) { if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; - if (rt) - dst_metric_set(&rt->dst, RTAX_HOPLIMIT, - ra_msg->icmph.icmp6_hop_limit); + fib6_metric_set(rt, RTAX_HOPLIMIT, + ra_msg->icmph.icmp6_hop_limit); } else { ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n"); } @@ -1475,10 +1480,7 @@ skip_routeinfo: ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); } else if (in6_dev->cnf.mtu6 != mtu) { in6_dev->cnf.mtu6 = mtu; - - if (rt) - dst_metric_set(&rt->dst, RTAX_MTU, mtu); - + fib6_metric_set(rt, RTAX_MTU, mtu); rt6_mtu_change(skb->dev, mtu); } } @@ -1497,7 +1499,7 @@ skip_routeinfo: ND_PRINTK(2, warn, "RA: invalid RA options\n"); } out: - ip6_rt_put(rt); + fib6_info_release(rt); if (neigh) neigh_release(neigh); } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 2cdf3dcf8c2c..b939b94e7e91 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -163,7 +163,8 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) } static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, - struct frag_hdr *fhdr, int nhoff) + struct frag_hdr *fhdr, int nhoff, + u32 *prob_offset) { struct sk_buff *prev, *next; struct net_device *dev; @@ -179,11 +180,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { - __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), - IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - ((u8 *)&fhdr->frag_off - - skb_network_header(skb))); + *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb); return -1; } @@ -214,10 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* RFC2460 says always send parameter problem in * this case. -DaveM */ - __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), - IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - offsetof(struct ipv6hdr, payload_len)); + *prob_offset = offsetof(struct ipv6hdr, payload_len); return -1; } if (end > fq->q.len) { @@ -519,15 +513,22 @@ static int ipv6_frag_rcv(struct sk_buff *skb) iif = skb->dev ? skb->dev->ifindex : 0; fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { + u32 prob_offset = 0; int ret; spin_lock(&fq->q.lock); fq->iif = iif; - ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); + ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff, + &prob_offset); spin_unlock(&fq->q.lock); inet_frag_put(&fq->q); + if (prob_offset) { + __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), + IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset); + } return ret; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1d738bfe893b..884d9cee790f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -78,7 +78,6 @@ enum rt6_nud_state { RT6_NUD_SUCCEED = 1 }; -static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); static unsigned int ip6_mtu(const struct dst_entry *dst); @@ -97,25 +96,24 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); -static void rt6_dst_from_metrics_check(struct rt6_info *rt); -static int rt6_score_route(struct rt6_info *rt, int oif, int strict); -static size_t rt6_nlmsg_size(struct rt6_info *rt); -static int rt6_fill_node(struct net *net, - struct sk_buff *skb, struct rt6_info *rt, - struct in6_addr *dst, struct in6_addr *src, +static int rt6_score_route(struct fib6_info *rt, int oif, int strict); +static size_t rt6_nlmsg_size(struct fib6_info *rt); +static int rt6_fill_node(struct net *net, struct sk_buff *skb, + struct fib6_info *rt, struct dst_entry *dst, + struct in6_addr *dest, struct in6_addr *src, int iif, int type, u32 portid, u32 seq, unsigned int flags); -static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, +static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, struct in6_addr *daddr, struct in6_addr *saddr); #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct fib6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref); -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct fib6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev); @@ -184,29 +182,10 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) } } -static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) -{ - return dst_metrics_write_ptr(&rt->from->dst); -} - -static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) -{ - struct rt6_info *rt = (struct rt6_info *)dst; - - if (rt->rt6i_flags & RTF_PCPU) - return rt6_pcpu_cow_metrics(rt); - else if (rt->rt6i_flags & RTF_CACHE) - return NULL; - else - return dst_cow_metrics_generic(dst, old); -} - -static inline const void *choose_neigh_daddr(struct rt6_info *rt, +static inline const void *choose_neigh_daddr(const struct in6_addr *p, struct sk_buff *skb, const void *daddr) { - struct in6_addr *p = &rt->rt6i_gateway; - if (!ipv6_addr_any(p)) return (const void *) p; else if (skb) @@ -214,18 +193,27 @@ static inline const void *choose_neigh_daddr(struct rt6_info *rt, return daddr; } -static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, - struct sk_buff *skb, - const void *daddr) +struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, + struct net_device *dev, + struct sk_buff *skb, + const void *daddr) { - struct rt6_info *rt = (struct rt6_info *) dst; struct neighbour *n; - daddr = choose_neigh_daddr(rt, skb, daddr); - n = __ipv6_neigh_lookup(dst->dev, daddr); + daddr = choose_neigh_daddr(gw, skb, daddr); + n = __ipv6_neigh_lookup(dev, daddr); if (n) return n; - return neigh_create(&nd_tbl, daddr, dst->dev); + return neigh_create(&nd_tbl, daddr, dev); +} + +static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) +{ + const struct rt6_info *rt = container_of(dst, struct rt6_info, dst); + + return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr); } static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) @@ -233,7 +221,7 @@ static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) struct net_device *dev = dst->dev; struct rt6_info *rt = (struct rt6_info *)dst; - daddr = choose_neigh_daddr(rt, NULL, daddr); + daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr); if (!daddr) return; if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) @@ -250,7 +238,7 @@ static struct dst_ops ip6_dst_ops_template = { .check = ip6_dst_check, .default_advmss = ip6_default_advmss, .mtu = ip6_mtu, - .cow_metrics = ipv6_cow_metrics, + .cow_metrics = dst_cow_metrics_generic, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, @@ -258,7 +246,7 @@ static struct dst_ops ip6_dst_ops_template = { .update_pmtu = ip6_rt_update_pmtu, .redirect = rt6_do_redirect, .local_out = __ip6_local_out, - .neigh_lookup = ip6_neigh_lookup, + .neigh_lookup = ip6_dst_neigh_lookup, .confirm_neigh = ip6_confirm_neigh, }; @@ -288,13 +276,22 @@ static struct dst_ops ip6_dst_blackhole_ops = { .update_pmtu = ip6_rt_blackhole_update_pmtu, .redirect = ip6_rt_blackhole_redirect, .cow_metrics = dst_cow_metrics_generic, - .neigh_lookup = ip6_neigh_lookup, + .neigh_lookup = ip6_dst_neigh_lookup, }; static const u32 ip6_template_metrics[RTAX_MAX] = { [RTAX_HOPLIMIT - 1] = 0, }; +static const struct fib6_info fib6_null_entry_template = { + .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP), + .fib6_protocol = RTPROT_KERNEL, + .fib6_metric = ~(u32)0, + .fib6_ref = ATOMIC_INIT(1), + .fib6_type = RTN_UNREACHABLE, + .fib6_metrics = (struct dst_metrics *)&dst_default_metrics, +}; + static const struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -305,9 +302,6 @@ static const struct rt6_info ip6_null_entry_template = { .output = ip6_pkt_discard_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), - .rt6i_protocol = RTPROT_KERNEL, - .rt6i_metric = ~(u32) 0, - .rt6i_ref = ATOMIC_INIT(1), }; #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -322,9 +316,6 @@ static const struct rt6_info ip6_prohibit_entry_template = { .output = ip6_pkt_prohibit_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), - .rt6i_protocol = RTPROT_KERNEL, - .rt6i_metric = ~(u32) 0, - .rt6i_ref = ATOMIC_INIT(1), }; static const struct rt6_info ip6_blk_hole_entry_template = { @@ -337,9 +328,6 @@ static const struct rt6_info ip6_blk_hole_entry_template = { .output = dst_discard_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), - .rt6i_protocol = RTPROT_KERNEL, - .rt6i_metric = ~(u32) 0, - .rt6i_ref = ATOMIC_INIT(1), }; #endif @@ -349,14 +337,12 @@ static void rt6_info_init(struct rt6_info *rt) struct dst_entry *dst = &rt->dst; memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); - INIT_LIST_HEAD(&rt->rt6i_siblings); INIT_LIST_HEAD(&rt->rt6i_uncached); } /* allocate dst with ip6_dst_ops */ -static struct rt6_info *__ip6_dst_alloc(struct net *net, - struct net_device *dev, - int flags) +struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, + int flags) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, flags); @@ -368,34 +354,15 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, return rt; } - -struct rt6_info *ip6_dst_alloc(struct net *net, - struct net_device *dev, - int flags) -{ - struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); - - if (rt) { - rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); - if (!rt->rt6i_pcpu) { - dst_release_immediate(&rt->dst); - return NULL; - } - } - - return rt; -} EXPORT_SYMBOL(ip6_dst_alloc); static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; - struct rt6_exception_bucket *bucket; - struct rt6_info *from = rt->from; + struct fib6_info *from = rt->from; struct inet6_dev *idev; dst_destroy_metrics_generic(dst); - free_percpu(rt->rt6i_pcpu); rt6_uncached_list_del(rt); idev = rt->rt6i_idev; @@ -403,14 +370,9 @@ static void ip6_dst_destroy(struct dst_entry *dst) rt->rt6i_idev = NULL; in6_dev_put(idev); } - bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1); - if (bucket) { - rt->rt6i_exception_bucket = NULL; - kfree(bucket); - } rt->from = NULL; - dst_release(&from->dst); + fib6_info_release(from); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -445,18 +407,18 @@ static bool rt6_check_expired(const struct rt6_info *rt) return true; } else if (rt->from) { return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || - rt6_check_expired(rt->from); + fib6_check_expired(rt->from); } return false; } -static struct rt6_info *rt6_multipath_select(const struct net *net, - struct rt6_info *match, +static struct fib6_info *rt6_multipath_select(const struct net *net, + struct fib6_info *match, struct flowi6 *fl6, int oif, const struct sk_buff *skb, int strict) { - struct rt6_info *sibling, *next_sibling; + struct fib6_info *sibling, *next_sibling; /* We might have already computed the hash for ICMPv6 errors. In such * case it will always be non-zero. Otherwise now is the time to do it. @@ -464,12 +426,15 @@ static struct rt6_info *rt6_multipath_select(const struct net *net, if (!fl6->mp_hash) fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); - if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) + if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound)) return match; - list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings, - rt6i_siblings) { - if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound)) + list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, + fib6_siblings) { + int nh_upper_bound; + + nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound); + if (fl6->mp_hash > nh_upper_bound) continue; if (rt6_score_route(sibling, oif, strict) < 0) break; @@ -484,38 +449,27 @@ static struct rt6_info *rt6_multipath_select(const struct net *net, * Route lookup. rcu_read_lock() should be held. */ -static inline struct rt6_info *rt6_device_match(struct net *net, - struct rt6_info *rt, +static inline struct fib6_info *rt6_device_match(struct net *net, + struct fib6_info *rt, const struct in6_addr *saddr, int oif, int flags) { - struct rt6_info *local = NULL; - struct rt6_info *sprt; + struct fib6_info *sprt; - if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD)) + if (!oif && ipv6_addr_any(saddr) && + !(rt->fib6_nh.nh_flags & RTNH_F_DEAD)) return rt; for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) { - struct net_device *dev = sprt->dst.dev; + const struct net_device *dev = sprt->fib6_nh.nh_dev; - if (sprt->rt6i_nh_flags & RTNH_F_DEAD) + if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD) continue; if (oif) { if (dev->ifindex == oif) return sprt; - if (dev->flags & IFF_LOOPBACK) { - if (!sprt->rt6i_idev || - sprt->rt6i_idev->dev->ifindex != oif) { - if (flags & RT6_LOOKUP_F_IFACE) - continue; - if (local && - local->rt6i_idev->dev->ifindex == oif) - continue; - } - local = sprt; - } } else { if (ipv6_chk_addr(net, saddr, dev, flags & RT6_LOOKUP_F_IFACE)) @@ -523,15 +477,10 @@ static inline struct rt6_info *rt6_device_match(struct net *net, } } - if (oif) { - if (local) - return local; - - if (flags & RT6_LOOKUP_F_IFACE) - return net->ipv6.ip6_null_entry; - } + if (oif && flags & RT6_LOOKUP_F_IFACE) + return net->ipv6.fib6_null_entry; - return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt; + return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt; } #ifdef CONFIG_IPV6_ROUTER_PREF @@ -553,10 +502,13 @@ static void rt6_probe_deferred(struct work_struct *w) kfree(work); } -static void rt6_probe(struct rt6_info *rt) +static void rt6_probe(struct fib6_info *rt) { struct __rt6_probe_work *work; + const struct in6_addr *nh_gw; struct neighbour *neigh; + struct net_device *dev; + /* * Okay, this does not seem to be appropriate * for now, however, we need to check if it @@ -565,20 +517,25 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) + if (!rt || !(rt->fib6_flags & RTF_GATEWAY)) return; + + nh_gw = &rt->fib6_nh.nh_gw; + dev = rt->fib6_nh.nh_dev; rcu_read_lock_bh(); - neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { + struct inet6_dev *idev; + if (neigh->nud_state & NUD_VALID) goto out; + idev = __in6_dev_get(dev); work = NULL; write_lock(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, - neigh->updated + - rt->rt6i_idev->cnf.rtr_probe_interval)) { + neigh->updated + idev->cnf.rtr_probe_interval)) { work = kmalloc(sizeof(*work), GFP_ATOMIC); if (work) __neigh_set_probe_once(neigh); @@ -590,9 +547,9 @@ static void rt6_probe(struct rt6_info *rt) if (work) { INIT_WORK(&work->work, rt6_probe_deferred); - work->target = rt->rt6i_gateway; - dev_hold(rt->dst.dev); - work->dev = rt->dst.dev; + work->target = *nh_gw; + dev_hold(dev); + work->dev = dev; schedule_work(&work->work); } @@ -600,7 +557,7 @@ out: rcu_read_unlock_bh(); } #else -static inline void rt6_probe(struct rt6_info *rt) +static inline void rt6_probe(struct fib6_info *rt) { } #endif @@ -608,28 +565,27 @@ static inline void rt6_probe(struct rt6_info *rt) /* * Default Router Selection (RFC 2461 6.3.6) */ -static inline int rt6_check_dev(struct rt6_info *rt, int oif) +static inline int rt6_check_dev(struct fib6_info *rt, int oif) { - struct net_device *dev = rt->dst.dev; + const struct net_device *dev = rt->fib6_nh.nh_dev; + if (!oif || dev->ifindex == oif) return 2; - if ((dev->flags & IFF_LOOPBACK) && - rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) - return 1; return 0; } -static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) +static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt) { - struct neighbour *neigh; enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; + struct neighbour *neigh; - if (rt->rt6i_flags & RTF_NONEXTHOP || - !(rt->rt6i_flags & RTF_GATEWAY)) + if (rt->fib6_flags & RTF_NONEXTHOP || + !(rt->fib6_flags & RTF_GATEWAY)) return RT6_NUD_SUCCEED; rcu_read_lock_bh(); - neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev, + &rt->fib6_nh.nh_gw); if (neigh) { read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) @@ -650,8 +606,7 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) return ret; } -static int rt6_score_route(struct rt6_info *rt, int oif, - int strict) +static int rt6_score_route(struct fib6_info *rt, int oif, int strict) { int m; @@ -659,7 +614,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, if (!m && (strict & RT6_LOOKUP_F_IFACE)) return RT6_NUD_FAIL_HARD; #ifdef CONFIG_IPV6_ROUTER_PREF - m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; + m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2; #endif if (strict & RT6_LOOKUP_F_REACHABLE) { int n = rt6_check_neigh(rt); @@ -669,23 +624,37 @@ static int rt6_score_route(struct rt6_info *rt, int oif, return m; } -static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, - int *mpri, struct rt6_info *match, +/* called with rc_read_lock held */ +static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i) +{ + const struct net_device *dev = fib6_info_nh_dev(f6i); + bool rc = false; + + if (dev) { + const struct inet6_dev *idev = __in6_dev_get(dev); + + rc = !!idev->cnf.ignore_routes_with_linkdown; + } + + return rc; +} + +static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict, + int *mpri, struct fib6_info *match, bool *do_rr) { int m; bool match_do_rr = false; - struct inet6_dev *idev = rt->rt6i_idev; - if (rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) goto out; - if (idev->cnf.ignore_routes_with_linkdown && - rt->rt6i_nh_flags & RTNH_F_LINKDOWN && + if (fib6_ignore_linkdown(rt) && + rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) goto out; - if (rt6_check_expired(rt)) + if (fib6_check_expired(rt)) goto out; m = rt6_score_route(rt, oif, strict); @@ -709,19 +678,19 @@ out: return match; } -static struct rt6_info *find_rr_leaf(struct fib6_node *fn, - struct rt6_info *leaf, - struct rt6_info *rr_head, +static struct fib6_info *find_rr_leaf(struct fib6_node *fn, + struct fib6_info *leaf, + struct fib6_info *rr_head, u32 metric, int oif, int strict, bool *do_rr) { - struct rt6_info *rt, *match, *cont; + struct fib6_info *rt, *match, *cont; int mpri = -1; match = NULL; cont = NULL; for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) { - if (rt->rt6i_metric != metric) { + if (rt->fib6_metric != metric) { cont = rt; break; } @@ -731,7 +700,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, for (rt = leaf; rt && rt != rr_head; rt = rcu_dereference(rt->rt6_next)) { - if (rt->rt6i_metric != metric) { + if (rt->fib6_metric != metric) { cont = rt; break; } @@ -748,16 +717,16 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, return match; } -static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, +static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn, int oif, int strict) { - struct rt6_info *leaf = rcu_dereference(fn->leaf); - struct rt6_info *match, *rt0; + struct fib6_info *leaf = rcu_dereference(fn->leaf); + struct fib6_info *match, *rt0; bool do_rr = false; int key_plen; - if (!leaf || leaf == net->ipv6.ip6_null_entry) - return net->ipv6.ip6_null_entry; + if (!leaf || leaf == net->ipv6.fib6_null_entry) + return net->ipv6.fib6_null_entry; rt0 = rcu_dereference(fn->rr_ptr); if (!rt0) @@ -768,39 +737,39 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, * (This might happen if all routes under fn are deleted from * the tree and fib6_repair_tree() is called on the node.) */ - key_plen = rt0->rt6i_dst.plen; + key_plen = rt0->fib6_dst.plen; #ifdef CONFIG_IPV6_SUBTREES - if (rt0->rt6i_src.plen) - key_plen = rt0->rt6i_src.plen; + if (rt0->fib6_src.plen) + key_plen = rt0->fib6_src.plen; #endif if (fn->fn_bit != key_plen) - return net->ipv6.ip6_null_entry; + return net->ipv6.fib6_null_entry; - match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict, + match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict, &do_rr); if (do_rr) { - struct rt6_info *next = rcu_dereference(rt0->rt6_next); + struct fib6_info *next = rcu_dereference(rt0->rt6_next); /* no entries matched; do round-robin */ - if (!next || next->rt6i_metric != rt0->rt6i_metric) + if (!next || next->fib6_metric != rt0->fib6_metric) next = leaf; if (next != rt0) { - spin_lock_bh(&leaf->rt6i_table->tb6_lock); + spin_lock_bh(&leaf->fib6_table->tb6_lock); /* make sure next is not being deleted from the tree */ - if (next->rt6i_node) + if (next->fib6_node) rcu_assign_pointer(fn->rr_ptr, next); - spin_unlock_bh(&leaf->rt6i_table->tb6_lock); + spin_unlock_bh(&leaf->fib6_table->tb6_lock); } } - return match ? match : net->ipv6.ip6_null_entry; + return match ? match : net->ipv6.fib6_null_entry; } -static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt) +static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt) { - return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); + return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); } #ifdef CONFIG_IPV6_ROUTE_INFO @@ -812,7 +781,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct in6_addr prefix_buf, *prefix; unsigned int pref; unsigned long lifetime; - struct rt6_info *rt; + struct fib6_info *rt; if (len < sizeof(struct route_info)) { return -EINVAL; @@ -850,13 +819,13 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (rinfo->prefix_len == 0) - rt = rt6_get_dflt_router(gwaddr, dev); + rt = rt6_get_dflt_router(net, gwaddr, dev); else rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev); if (rt && !lifetime) { - ip6_del_rt(rt); + ip6_del_rt(net, rt); rt = NULL; } @@ -864,21 +833,162 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev, pref); else if (rt) - rt->rt6i_flags = RTF_ROUTEINFO | - (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); + rt->fib6_flags = RTF_ROUTEINFO | + (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { if (!addrconf_finite_timeout(lifetime)) - rt6_clean_expires(rt); + fib6_clean_expires(rt); else - rt6_set_expires(rt, jiffies + HZ * lifetime); + fib6_set_expires(rt, jiffies + HZ * lifetime); - ip6_rt_put(rt); + fib6_info_release(rt); } return 0; } #endif +/* + * Misc support functions + */ + +/* called with rcu_lock held */ +static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt) +{ + struct net_device *dev = rt->fib6_nh.nh_dev; + + if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) { + /* for copies of local routes, dst->dev needs to be the + * device if it is a master device, the master device if + * device is enslaved, and the loopback as the default + */ + if (netif_is_l3_slave(dev) && + !rt6_need_strict(&rt->fib6_dst.addr)) + dev = l3mdev_master_dev_rcu(dev); + else if (!netif_is_l3_master(dev)) + dev = dev_net(dev)->loopback_dev; + /* last case is netif_is_l3_master(dev) is true in which + * case we want dev returned to be dev + */ + } + + return dev; +} + +static const int fib6_prop[RTN_MAX + 1] = { + [RTN_UNSPEC] = 0, + [RTN_UNICAST] = 0, + [RTN_LOCAL] = 0, + [RTN_BROADCAST] = 0, + [RTN_ANYCAST] = 0, + [RTN_MULTICAST] = 0, + [RTN_BLACKHOLE] = -EINVAL, + [RTN_UNREACHABLE] = -EHOSTUNREACH, + [RTN_PROHIBIT] = -EACCES, + [RTN_THROW] = -EAGAIN, + [RTN_NAT] = -EINVAL, + [RTN_XRESOLVE] = -EINVAL, +}; + +static int ip6_rt_type_to_error(u8 fib6_type) +{ + return fib6_prop[fib6_type]; +} + +static unsigned short fib6_info_dst_flags(struct fib6_info *rt) +{ + unsigned short flags = 0; + + if (rt->dst_nocount) + flags |= DST_NOCOUNT; + if (rt->dst_nopolicy) + flags |= DST_NOPOLICY; + if (rt->dst_host) + flags |= DST_HOST; + + return flags; +} + +static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort) +{ + rt->dst.error = ip6_rt_type_to_error(ort->fib6_type); + + switch (ort->fib6_type) { + case RTN_BLACKHOLE: + rt->dst.output = dst_discard_out; + rt->dst.input = dst_discard; + break; + case RTN_PROHIBIT: + rt->dst.output = ip6_pkt_prohibit_out; + rt->dst.input = ip6_pkt_prohibit; + break; + case RTN_THROW: + case RTN_UNREACHABLE: + default: + rt->dst.output = ip6_pkt_discard_out; + rt->dst.input = ip6_pkt_discard; + break; + } +} + +static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) +{ + rt->dst.flags |= fib6_info_dst_flags(ort); + + if (ort->fib6_flags & RTF_REJECT) { + ip6_rt_init_dst_reject(rt, ort); + return; + } + + rt->dst.error = 0; + rt->dst.output = ip6_output; + + if (ort->fib6_type == RTN_LOCAL) { + rt->dst.input = ip6_input; + } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { + rt->dst.input = ip6_mc_input; + } else { + rt->dst.input = ip6_forward; + } + + if (ort->fib6_nh.nh_lwtstate) { + rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); + lwtunnel_set_redirect(&rt->dst); + } + + rt->dst.lastuse = jiffies; +} + +static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) +{ + rt->rt6i_flags &= ~RTF_EXPIRES; + fib6_info_hold(from); + rt->from = from; + dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); + if (from->fib6_metrics != &dst_default_metrics) { + rt->dst._metrics |= DST_METRICS_REFCOUNTED; + refcount_inc(&from->fib6_metrics->refcnt); + } +} + +static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort) +{ + struct net_device *dev = fib6_info_nh_dev(ort); + + ip6_rt_init_dst(rt, ort); + + rt->rt6i_dst = ort->fib6_dst; + rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL; + rt->rt6i_gateway = ort->fib6_nh.nh_gw; + rt->rt6i_flags = ort->fib6_flags; + rt6_set_from(rt, ort); +#ifdef CONFIG_IPV6_SUBTREES + rt->rt6i_src = ort->fib6_src; +#endif + rt->rt6i_prefsrc = ort->fib6_prefsrc; + rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); +} + static struct fib6_node* fib6_backtrack(struct fib6_node *fn, struct in6_addr *saddr) { @@ -914,14 +1024,29 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, return false; } +/* called with rcu_lock held */ +static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt) +{ + unsigned short flags = fib6_info_dst_flags(rt); + struct net_device *dev = rt->fib6_nh.nh_dev; + struct rt6_info *nrt; + + nrt = ip6_dst_alloc(dev_net(dev), dev, flags); + if (nrt) + ip6_rt_copy_init(nrt, rt); + + return nrt; +} + static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags) { - struct rt6_info *rt, *rt_cache; + struct fib6_info *f6i; struct fib6_node *fn; + struct rt6_info *rt; if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) flags &= ~RT6_LOOKUP_F_IFACE; @@ -929,35 +1054,43 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, rcu_read_lock(); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: - rt = rcu_dereference(fn->leaf); - if (!rt) { - rt = net->ipv6.ip6_null_entry; + f6i = rcu_dereference(fn->leaf); + if (!f6i) { + f6i = net->ipv6.fib6_null_entry; } else { - rt = rt6_device_match(net, rt, &fl6->saddr, + f6i = rt6_device_match(net, f6i, &fl6->saddr, fl6->flowi6_oif, flags); - if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif, - skb, flags); + if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0) + f6i = rt6_multipath_select(net, f6i, fl6, + fl6->flowi6_oif, skb, flags); } - if (rt == net->ipv6.ip6_null_entry) { + if (f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; } - /* Search through exception table */ - rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); - if (rt_cache) - rt = rt_cache; - if (ip6_hold_safe(net, &rt, true)) - dst_use_noref(&rt->dst, jiffies); + /* Search through exception table */ + rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + if (rt) { + if (ip6_hold_safe(net, &rt, true)) + dst_use_noref(&rt->dst, jiffies); + } else if (f6i == net->ipv6.fib6_null_entry) { + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } else { + rt = ip6_create_rt_rcu(f6i); + if (!rt) { + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } + } rcu_read_unlock(); trace_fib6_table_lookup(net, rt, table, fl6); return rt; - } struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, @@ -999,55 +1132,28 @@ EXPORT_SYMBOL(rt6_lookup); * Caller must hold dst before calling it. */ -static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, - struct mx6_config *mxc, +static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack) { int err; struct fib6_table *table; - table = rt->rt6i_table; + table = rt->fib6_table; spin_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info, mxc, extack); + err = fib6_add(&table->tb6_root, rt, info, extack); spin_unlock_bh(&table->tb6_lock); return err; } -int ip6_ins_rt(struct rt6_info *rt) +int ip6_ins_rt(struct net *net, struct fib6_info *rt) { - struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; - struct mx6_config mxc = { .mx = NULL, }; - - /* Hold dst to account for the reference from the fib6 tree */ - dst_hold(&rt->dst); - return __ip6_ins_rt(rt, &info, &mxc, NULL); -} - -/* called with rcu_lock held */ -static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt) -{ - struct net_device *dev = rt->dst.dev; - - if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) { - /* for copies of local routes, dst->dev needs to be the - * device if it is a master device, the master device if - * device is enslaved, and the loopback as the default - */ - if (netif_is_l3_slave(dev) && - !rt6_need_strict(&rt->rt6i_dst.addr)) - dev = l3mdev_master_dev_rcu(dev); - else if (!netif_is_l3_master(dev)) - dev = dev_net(dev)->loopback_dev; - /* last case is netif_is_l3_master(dev) is true in which - * case we want dev returned to be dev - */ - } + struct nl_info info = { .nl_net = net, }; - return dev; + return __ip6_ins_rt(rt, &info, NULL); } -static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, +static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort, const struct in6_addr *daddr, const struct in6_addr *saddr) { @@ -1058,26 +1164,22 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, * Clone the route. */ - if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) - ort = ort->from; - rcu_read_lock(); dev = ip6_rt_get_dev_rcu(ort); - rt = __ip6_dst_alloc(dev_net(dev), dev, 0); + rt = ip6_dst_alloc(dev_net(dev), dev, 0); rcu_read_unlock(); if (!rt) return NULL; ip6_rt_copy_init(rt, ort); rt->rt6i_flags |= RTF_CACHE; - rt->rt6i_metric = 0; rt->dst.flags |= DST_HOST; rt->rt6i_dst.addr = *daddr; rt->rt6i_dst.plen = 128; if (!rt6_is_gw_or_nonexthop(ort)) { - if (ort->rt6i_dst.plen != 128 && - ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) + if (ort->fib6_dst.plen != 128 && + ipv6_addr_equal(&ort->fib6_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { @@ -1090,45 +1192,44 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, return rt; } -static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) +static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt) { + unsigned short flags = fib6_info_dst_flags(rt); struct net_device *dev; struct rt6_info *pcpu_rt; rcu_read_lock(); dev = ip6_rt_get_dev_rcu(rt); - pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags); + pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags); rcu_read_unlock(); if (!pcpu_rt) return NULL; ip6_rt_copy_init(pcpu_rt, rt); - pcpu_rt->rt6i_protocol = rt->rt6i_protocol; pcpu_rt->rt6i_flags |= RTF_PCPU; return pcpu_rt; } /* It should be called with rcu_read_lock() acquired */ -static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) +static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt) { struct rt6_info *pcpu_rt, **p; p = this_cpu_ptr(rt->rt6i_pcpu); pcpu_rt = *p; - if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false)) - rt6_dst_from_metrics_check(pcpu_rt); + if (pcpu_rt) + ip6_hold_safe(NULL, &pcpu_rt, false); return pcpu_rt; } -static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) +static struct rt6_info *rt6_make_pcpu_route(struct net *net, + struct fib6_info *rt) { struct rt6_info *pcpu_rt, *prev, **p; pcpu_rt = ip6_rt_pcpu_alloc(rt); if (!pcpu_rt) { - struct net *net = dev_net(rt->dst.dev); - dst_hold(&net->ipv6.ip6_null_entry->dst); return net->ipv6.ip6_null_entry; } @@ -1138,7 +1239,6 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) prev = cmpxchg(p, NULL, pcpu_rt); BUG_ON(prev); - rt6_dst_from_metrics_check(pcpu_rt); return pcpu_rt; } @@ -1158,9 +1258,8 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket, return; net = dev_net(rt6_ex->rt6i->dst.dev); - rt6_ex->rt6i->rt6i_node = NULL; hlist_del_rcu(&rt6_ex->hlist); - rt6_release(rt6_ex->rt6i); + dst_release(&rt6_ex->rt6i->dst); kfree_rcu(rt6_ex, rcu); WARN_ON_ONCE(!bucket->depth); bucket->depth--; @@ -1268,20 +1367,36 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket, return NULL; } +static unsigned int fib6_mtu(const struct fib6_info *rt) +{ + unsigned int mtu; + + if (rt->fib6_pmtu) { + mtu = rt->fib6_pmtu; + } else { + struct net_device *dev = fib6_info_nh_dev(rt); + struct inet6_dev *idev; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + mtu = idev->cnf.mtu6; + rcu_read_unlock(); + } + + mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); + + return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu); +} + static int rt6_insert_exception(struct rt6_info *nrt, - struct rt6_info *ort) + struct fib6_info *ort) { - struct net *net = dev_net(ort->dst.dev); + struct net *net = dev_net(nrt->dst.dev); struct rt6_exception_bucket *bucket; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; int err = 0; - /* ort can't be a cache or pcpu route */ - if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) - ort = ort->from; - WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)); - spin_lock_bh(&rt6_exception_lock); if (ort->exception_bucket_flushed) { @@ -1308,19 +1423,19 @@ static int rt6_insert_exception(struct rt6_info *nrt, * Otherwise, the exception table is indexed by * a hash of only rt6i_dst. */ - if (ort->rt6i_src.plen) + if (ort->fib6_src.plen) src_key = &nrt->rt6i_src.addr; #endif /* Update rt6i_prefsrc as it could be changed * in rt6_remove_prefsrc() */ - nrt->rt6i_prefsrc = ort->rt6i_prefsrc; + nrt->rt6i_prefsrc = ort->fib6_prefsrc; /* rt6_mtu_change() might lower mtu on ort. * Only insert this exception route if its mtu * is less than ort's mtu value. */ - if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) { + if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) { err = -EINVAL; goto out; } @@ -1337,8 +1452,6 @@ static int rt6_insert_exception(struct rt6_info *nrt, } rt6_ex->rt6i = nrt; rt6_ex->stamp = jiffies; - atomic_inc(&nrt->rt6i_ref); - nrt->rt6i_node = ort->rt6i_node; hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain); bucket->depth++; net->ipv6.rt6_stats->fib_rt_cache++; @@ -1351,16 +1464,16 @@ out: /* Update fn->fn_sernum to invalidate all cached dst */ if (!err) { - spin_lock_bh(&ort->rt6i_table->tb6_lock); - fib6_update_sernum(ort); - spin_unlock_bh(&ort->rt6i_table->tb6_lock); + spin_lock_bh(&ort->fib6_table->tb6_lock); + fib6_update_sernum(net, ort); + spin_unlock_bh(&ort->fib6_table->tb6_lock); fib6_force_start_gc(net); } return err; } -void rt6_flush_exceptions(struct rt6_info *rt) +void rt6_flush_exceptions(struct fib6_info *rt) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1390,7 +1503,7 @@ out: /* Find cached rt in the hash table inside passed in rt * Caller has to hold rcu_read_lock() */ -static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, +static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, struct in6_addr *daddr, struct in6_addr *saddr) { @@ -1408,7 +1521,7 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, * Otherwise, the exception table is indexed by * a hash of only rt6i_dst. */ - if (rt->rt6i_src.plen) + if (rt->fib6_src.plen) src_key = saddr; #endif rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); @@ -1420,10 +1533,10 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, } /* Remove the passed in cached rt from the hash table that contains it */ -int rt6_remove_exception_rt(struct rt6_info *rt) +static int rt6_remove_exception_rt(struct rt6_info *rt) { struct rt6_exception_bucket *bucket; - struct rt6_info *from = rt->from; + struct fib6_info *from = rt->from; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; int err; @@ -1445,7 +1558,7 @@ int rt6_remove_exception_rt(struct rt6_info *rt) * Otherwise, the exception table is indexed by * a hash of only rt6i_dst. */ - if (from->rt6i_src.plen) + if (from->fib6_src.plen) src_key = &rt->rt6i_src.addr; #endif rt6_ex = __rt6_find_exception_spinlock(&bucket, @@ -1468,7 +1581,7 @@ int rt6_remove_exception_rt(struct rt6_info *rt) static void rt6_update_exception_stamp_rt(struct rt6_info *rt) { struct rt6_exception_bucket *bucket; - struct rt6_info *from = rt->from; + struct fib6_info *from = rt->from; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; @@ -1486,7 +1599,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) * Otherwise, the exception table is indexed by * a hash of only rt6i_dst. */ - if (from->rt6i_src.plen) + if (from->fib6_src.plen) src_key = &rt->rt6i_src.addr; #endif rt6_ex = __rt6_find_exception_rcu(&bucket, @@ -1498,7 +1611,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) rcu_read_unlock(); } -static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt) +static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1540,7 +1653,7 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev, } static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, - struct rt6_info *rt, int mtu) + struct fib6_info *rt, int mtu) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1557,12 +1670,12 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, struct rt6_info *entry = rt6_ex->rt6i; /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected - * route), the metrics of its rt->dst.from have already + * route), the metrics of its rt->from have already * been updated. */ - if (entry->rt6i_pmtu && + if (dst_metric_raw(&entry->dst, RTAX_MTU) && rt6_mtu_change_route_allowed(idev, entry, mtu)) - entry->rt6i_pmtu = mtu; + dst_metric_set(&entry->dst, RTAX_MTU, mtu); } bucket++; } @@ -1570,7 +1683,7 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) -static void rt6_exceptions_clean_tohost(struct rt6_info *rt, +static void rt6_exceptions_clean_tohost(struct fib6_info *rt, struct in6_addr *gateway) { struct rt6_exception_bucket *bucket; @@ -1649,7 +1762,7 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, gc_args->more++; } -void rt6_age_exceptions(struct rt6_info *rt, +void rt6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, unsigned long now) { @@ -1685,7 +1798,8 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, const struct sk_buff *skb, int flags) { struct fib6_node *fn, *saved_fn; - struct rt6_info *rt, *rt_cache; + struct fib6_info *f6i; + struct rt6_info *rt; int strict = 0; strict |= flags & RT6_LOOKUP_F_IFACE; @@ -1702,10 +1816,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, oif = 0; redo_rt6_select: - rt = rt6_select(net, fn, oif, strict); - if (rt->rt6i_nsiblings) - rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict); - if (rt == net->ipv6.ip6_null_entry) { + f6i = rt6_select(net, fn, oif, strict); + if (f6i->fib6_nsiblings) + f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict); + if (f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto redo_rt6_select; @@ -1717,26 +1831,25 @@ redo_rt6_select: } } - /*Search through exception table */ - rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); - if (rt_cache) - rt = rt_cache; - - if (rt == net->ipv6.ip6_null_entry) { + if (f6i == net->ipv6.fib6_null_entry) { + rt = net->ipv6.ip6_null_entry; rcu_read_unlock(); dst_hold(&rt->dst); trace_fib6_table_lookup(net, rt, table, fl6); return rt; - } else if (rt->rt6i_flags & RTF_CACHE) { - if (ip6_hold_safe(net, &rt, true)) { + } + + /*Search through exception table */ + rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + if (rt) { + if (ip6_hold_safe(net, &rt, true)) dst_use_noref(&rt->dst, jiffies); - rt6_dst_from_metrics_check(rt); - } + rcu_read_unlock(); trace_fib6_table_lookup(net, rt, table, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && - !(rt->rt6i_flags & RTF_GATEWAY))) { + !(f6i->fib6_flags & RTF_GATEWAY))) { /* Create a RTF_CACHE clone which will not be * owned by the fib6 tree. It is for the special case where * the daddr in the skb during the neighbor look-up is different @@ -1745,17 +1858,11 @@ redo_rt6_select: struct rt6_info *uncached_rt; - if (ip6_hold_safe(net, &rt, true)) { - dst_use_noref(&rt->dst, jiffies); - } else { - rcu_read_unlock(); - uncached_rt = rt; - goto uncached_rt_out; - } + fib6_info_hold(f6i); rcu_read_unlock(); - uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); - dst_release(&rt->dst); + uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL); + fib6_info_release(f6i); if (uncached_rt) { /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc() @@ -1768,7 +1875,6 @@ redo_rt6_select: dst_hold(&uncached_rt->dst); } -uncached_rt_out: trace_fib6_table_lookup(net, uncached_rt, table, fl6); return uncached_rt; @@ -1777,24 +1883,12 @@ uncached_rt_out: struct rt6_info *pcpu_rt; - dst_use_noref(&rt->dst, jiffies); local_bh_disable(); - pcpu_rt = rt6_get_pcpu_route(rt); - - if (!pcpu_rt) { - /* atomic_inc_not_zero() is needed when using rcu */ - if (atomic_inc_not_zero(&rt->rt6i_ref)) { - /* No dst_hold() on rt is needed because grabbing - * rt->rt6i_ref makes sure rt can't be released. - */ - pcpu_rt = rt6_make_pcpu_route(rt); - rt6_release(rt); - } else { - /* rt is already removed from tree */ - pcpu_rt = net->ipv6.ip6_null_entry; - dst_hold(&pcpu_rt->dst); - } - } + pcpu_rt = rt6_get_pcpu_route(f6i); + + if (!pcpu_rt) + pcpu_rt = rt6_make_pcpu_route(net, f6i); + local_bh_enable(); rcu_read_unlock(); trace_fib6_table_lookup(net, pcpu_rt, table, fl6); @@ -2015,7 +2109,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt->rt6i_idev = in6_dev_get(loopback_dev); rt->rt6i_gateway = ort->rt6i_gateway; rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; - rt->rt6i_metric = 0; memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); #ifdef CONFIG_IPV6_SUBTREES @@ -2031,18 +2124,26 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori * Destination cache support functions */ -static void rt6_dst_from_metrics_check(struct rt6_info *rt) +static bool fib6_check(struct fib6_info *f6i, u32 cookie) { - if (rt->from && - dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst)) - dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true); + u32 rt_cookie = 0; + + if ((f6i && !rt6_get_cookie_safe(f6i, &rt_cookie)) || + rt_cookie != cookie) + return false; + + if (fib6_check_expired(f6i)) + return false; + + return true; } static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) { u32 rt_cookie = 0; - if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie) + if ((rt->from && !rt6_get_cookie_safe(rt->from, &rt_cookie)) || + rt_cookie != cookie) return NULL; if (rt6_check_expired(rt)) @@ -2055,7 +2156,7 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) { if (!__rt6_check_expired(rt) && rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && - rt6_check(rt->from, cookie)) + fib6_check(rt->from, cookie)) return &rt->dst; else return NULL; @@ -2072,8 +2173,6 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) * into this function always. */ - rt6_dst_from_metrics_check(rt); - if (rt->rt6i_flags & RTF_PCPU || (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) return rt6_dst_from_check(rt, cookie); @@ -2088,7 +2187,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) if (rt) { if (rt->rt6i_flags & RTF_CACHE) { if (rt6_check_expired(rt)) { - ip6_del_rt(rt); + rt6_remove_exception_rt(rt); dst = NULL; } } else { @@ -2109,12 +2208,12 @@ static void ip6_link_failure(struct sk_buff *skb) if (rt) { if (rt->rt6i_flags & RTF_CACHE) { if (dst_hold_safe(&rt->dst)) - ip6_del_rt(rt); - } else { + rt6_remove_exception_rt(rt); + } else if (rt->from) { struct fib6_node *fn; rcu_read_lock(); - fn = rcu_dereference(rt->rt6i_node); + fn = rcu_dereference(rt->from->fib6_node); if (fn && (rt->rt6i_flags & RTF_DEFAULT)) fn->fn_sernum = -1; rcu_read_unlock(); @@ -2126,16 +2225,15 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) { struct net *net = dev_net(rt->dst.dev); + dst_metric_set(&rt->dst, RTAX_MTU, mtu); rt->rt6i_flags |= RTF_MODIFIED; - rt->rt6i_pmtu = mtu; rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); } static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || - rcu_access_pointer(rt->rt6i_node)); + (rt->rt6i_flags & RTF_PCPU || rt->from); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, @@ -2173,10 +2271,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, } else if (daddr) { struct rt6_info *nrt6; - nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); + nrt6 = ip6_rt_cache_alloc(rt6->from, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); - if (rt6_insert_exception(nrt6, rt6)) + if (rt6_insert_exception(nrt6, rt6->from)) dst_release_immediate(&nrt6->dst); } } @@ -2259,7 +2357,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, int flags) { struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; - struct rt6_info *rt, *rt_cache; + struct rt6_info *ret = NULL, *rt_cache; + struct fib6_info *rt; struct fib6_node *fn; /* Get the "current" route for this destination and @@ -2276,29 +2375,29 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: for_each_fib6_node_rt_rcu(fn) { - if (rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) continue; - if (rt6_check_expired(rt)) + if (fib6_check_expired(rt)) continue; - if (rt->dst.error) + if (rt->fib6_flags & RTF_REJECT) break; - if (!(rt->rt6i_flags & RTF_GATEWAY)) + if (!(rt->fib6_flags & RTF_GATEWAY)) continue; - if (fl6->flowi6_oif != rt->dst.dev->ifindex) + if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex) continue; /* rt_cache's gateway might be different from its 'parent' * in the case of an ip redirect. * So we keep searching in the exception table if the gateway * is different. */ - if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) { + if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) { rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); if (rt_cache && ipv6_addr_equal(&rdfl->gateway, &rt_cache->rt6i_gateway)) { - rt = rt_cache; + ret = rt_cache; break; } continue; @@ -2307,25 +2406,28 @@ restart: } if (!rt) - rt = net->ipv6.ip6_null_entry; - else if (rt->dst.error) { - rt = net->ipv6.ip6_null_entry; + rt = net->ipv6.fib6_null_entry; + else if (rt->fib6_flags & RTF_REJECT) { + ret = net->ipv6.ip6_null_entry; goto out; } - if (rt == net->ipv6.ip6_null_entry) { + if (rt == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; } out: - ip6_hold_safe(net, &rt, true); + if (ret) + dst_hold(&ret->dst); + else + ret = ip6_create_rt_rcu(rt); rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table, fl6); - return rt; + trace_fib6_table_lookup(net, ret, table, fl6); + return ret; }; static struct dst_entry *ip6_route_redirect(struct net *net, @@ -2417,12 +2519,8 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) static unsigned int ip6_mtu(const struct dst_entry *dst) { - const struct rt6_info *rt = (const struct rt6_info *)dst; - unsigned int mtu = rt->rt6i_pmtu; struct inet6_dev *idev; - - if (mtu) - goto out; + unsigned int mtu; mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) @@ -2506,60 +2604,22 @@ out: return entries > rt_max_size; } -static int ip6_convert_metrics(struct mx6_config *mxc, - const struct fib6_config *cfg) +static int ip6_convert_metrics(struct net *net, struct fib6_info *rt, + struct fib6_config *cfg) { - struct net *net = cfg->fc_nlinfo.nl_net; - bool ecn_ca = false; - struct nlattr *nla; - int remaining; - u32 *mp; + struct dst_metrics *p; if (!cfg->fc_mx) return 0; - mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); - if (unlikely(!mp)) + p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL); + if (unlikely(!p)) return -ENOMEM; - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla_type(nla); - u32 val; - - if (!type) - continue; - if (unlikely(type > RTAX_MAX)) - goto err; - - if (type == RTAX_CC_ALGO) { - char tmp[TCP_CA_NAME_MAX]; + refcount_set(&p->refcnt, 1); + rt->fib6_metrics = p; - nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); - if (val == TCP_CA_UNSPEC) - goto err; - } else { - val = nla_get_u32(nla); - } - if (type == RTAX_HOPLIMIT && val > 255) - val = 255; - if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) - goto err; - - mp[type - 1] = val; - __set_bit(type - 1, mxc->mx_valid); - } - - if (ecn_ca) { - __set_bit(RTAX_FEATURES - 1, mxc->mx_valid); - mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; - } - - mxc->mx = mp; - return 0; - err: - kfree(mp); - return -EINVAL; + return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics); } static struct rt6_info *ip6_nh_lookup_table(struct net *net, @@ -2745,11 +2805,12 @@ out: return err; } -static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, +static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, + gfp_t gfp_flags, struct netlink_ext_ack *extack) { struct net *net = cfg->fc_nlinfo.nl_net; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; struct fib6_table *table; @@ -2768,6 +2829,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } + if (cfg->fc_type > RTN_MAX) { + NL_SET_ERR_MSG(extack, "Invalid route type"); + goto out; + } + if (cfg->fc_dst_len > 128) { NL_SET_ERR_MSG(extack, "Invalid prefix length"); goto out; @@ -2826,35 +2892,30 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, - (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); + err = -ENOMEM; + rt = fib6_info_alloc(gfp_flags); + if (!rt) + goto out; - if (!rt) { - err = -ENOMEM; + if (cfg->fc_flags & RTF_ADDRCONF) + rt->dst_nocount = true; + + err = ip6_convert_metrics(net, rt, cfg); + if (err < 0) goto out; - } if (cfg->fc_flags & RTF_EXPIRES) - rt6_set_expires(rt, jiffies + + fib6_set_expires(rt, jiffies + clock_t_to_jiffies(cfg->fc_expires)); else - rt6_clean_expires(rt); + fib6_clean_expires(rt); if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; - rt->rt6i_protocol = cfg->fc_protocol; + rt->fib6_protocol = cfg->fc_protocol; addr_type = ipv6_addr_type(&cfg->fc_dst); - if (addr_type & IPV6_ADDR_MULTICAST) - rt->dst.input = ip6_mc_input; - else if (cfg->fc_flags & RTF_LOCAL) - rt->dst.input = ip6_input; - else - rt->dst.input = ip6_forward; - - rt->dst.output = ip6_output; - if (cfg->fc_encap) { struct lwtunnel_state *lwtstate; @@ -2863,22 +2924,23 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, &lwtstate, extack); if (err) goto out; - rt->dst.lwtstate = lwtstate_get(lwtstate); - lwtunnel_set_redirect(&rt->dst); + rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate); } - ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); - rt->rt6i_dst.plen = cfg->fc_dst_len; - if (rt->rt6i_dst.plen == 128) - rt->dst.flags |= DST_HOST; + ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); + rt->fib6_dst.plen = cfg->fc_dst_len; + if (rt->fib6_dst.plen == 128) + rt->dst_host = true; #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); - rt->rt6i_src.plen = cfg->fc_src_len; + ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); + rt->fib6_src.plen = cfg->fc_src_len; #endif - rt->rt6i_metric = cfg->fc_metric; - rt->rt6i_nh_weight = 1; + rt->fib6_metric = cfg->fc_metric; + rt->fib6_nh.nh_weight = 1; + + rt->fib6_type = cfg->fc_type; /* We cannot add true routes via loopback here, they would result in kernel looping; promote them to reject routes @@ -2901,28 +2963,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } } - rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; - switch (cfg->fc_type) { - case RTN_BLACKHOLE: - rt->dst.error = -EINVAL; - rt->dst.output = dst_discard_out; - rt->dst.input = dst_discard; - break; - case RTN_PROHIBIT: - rt->dst.error = -EACCES; - rt->dst.output = ip6_pkt_prohibit_out; - rt->dst.input = ip6_pkt_prohibit; - break; - case RTN_THROW: - case RTN_UNREACHABLE: - default: - rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN - : (cfg->fc_type == RTN_UNREACHABLE) - ? -EHOSTUNREACH : -ENETUNREACH; - rt->dst.output = ip6_pkt_discard_out; - rt->dst.input = ip6_pkt_discard; - break; - } + rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP; goto install_route; } @@ -2931,7 +2972,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (err) goto out; - rt->rt6i_gateway = cfg->fc_gateway; + rt->fib6_nh.nh_gw = cfg->fc_gateway; } err = -ENODEV; @@ -2956,96 +2997,82 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, err = -EINVAL; goto out; } - rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; - rt->rt6i_prefsrc.plen = 128; + rt->fib6_prefsrc.addr = cfg->fc_prefsrc; + rt->fib6_prefsrc.plen = 128; } else - rt->rt6i_prefsrc.plen = 0; + rt->fib6_prefsrc.plen = 0; - rt->rt6i_flags = cfg->fc_flags; + rt->fib6_flags = cfg->fc_flags; install_route: - if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) && + if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) && !netif_carrier_ok(dev)) - rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; - rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); - rt->dst.dev = dev; - rt->rt6i_idev = idev; - rt->rt6i_table = table; + rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; + rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); + rt->fib6_nh.nh_dev = dev; + rt->fib6_table = table; cfg->fc_nlinfo.nl_net = dev_net(dev); + if (idev) + in6_dev_put(idev); + return rt; out: if (dev) dev_put(dev); if (idev) in6_dev_put(idev); - if (rt) - dst_release_immediate(&rt->dst); + fib6_info_release(rt); return ERR_PTR(err); } -int ip6_route_add(struct fib6_config *cfg, +int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { - struct mx6_config mxc = { .mx = NULL, }; - struct rt6_info *rt; + struct fib6_info *rt; int err; - rt = ip6_route_info_create(cfg, extack); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - rt = NULL; - goto out; - } - - err = ip6_convert_metrics(&mxc, cfg); - if (err) - goto out; - - err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack); + rt = ip6_route_info_create(cfg, gfp_flags, extack); + if (IS_ERR(rt)) + return PTR_ERR(rt); - kfree(mxc.mx); - - return err; -out: - if (rt) - dst_release_immediate(&rt->dst); + err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack); + fib6_info_release(rt); return err; } -static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) +static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info) { - int err; + struct net *net = info->nl_net; struct fib6_table *table; - struct net *net = dev_net(rt->dst.dev); + int err; - if (rt == net->ipv6.ip6_null_entry) { + if (rt == net->ipv6.fib6_null_entry) { err = -ENOENT; goto out; } - table = rt->rt6i_table; + table = rt->fib6_table; spin_lock_bh(&table->tb6_lock); err = fib6_del(rt, info); spin_unlock_bh(&table->tb6_lock); out: - ip6_rt_put(rt); + fib6_info_release(rt); return err; } -int ip6_del_rt(struct rt6_info *rt) +int ip6_del_rt(struct net *net, struct fib6_info *rt) { - struct nl_info info = { - .nl_net = dev_net(rt->dst.dev), - }; + struct nl_info info = { .nl_net = net }; + return __ip6_del_rt(rt, &info); } -static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) +static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg) { struct nl_info *info = &cfg->fc_nlinfo; struct net *net = info->nl_net; @@ -3053,20 +3080,20 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) struct fib6_table *table; int err = -ENOENT; - if (rt == net->ipv6.ip6_null_entry) + if (rt == net->ipv6.fib6_null_entry) goto out_put; - table = rt->rt6i_table; + table = rt->fib6_table; spin_lock_bh(&table->tb6_lock); - if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) { - struct rt6_info *sibling, *next_sibling; + if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) { + struct fib6_info *sibling, *next_sibling; /* prefer to send a single notification with all hops */ skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); if (skb) { u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; - if (rt6_fill_node(net, skb, rt, + if (rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0, RTM_DELROUTE, info->portid, seq, 0) < 0) { kfree_skb(skb); @@ -3076,8 +3103,8 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) } list_for_each_entry_safe(sibling, next_sibling, - &rt->rt6i_siblings, - rt6i_siblings) { + &rt->fib6_siblings, + fib6_siblings) { err = fib6_del(sibling, info); if (err) goto out_unlock; @@ -3088,7 +3115,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) out_unlock: spin_unlock_bh(&table->tb6_lock); out_put: - ip6_rt_put(rt); + fib6_info_release(rt); if (skb) { rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, @@ -3097,11 +3124,28 @@ out_put: return err; } +static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg) +{ + int rc = -ESRCH; + + if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex) + goto out; + + if (cfg->fc_flags & RTF_GATEWAY && + !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) + goto out; + if (dst_hold_safe(&rt->dst)) + rc = rt6_remove_exception_rt(rt); +out: + return rc; +} + static int ip6_route_del(struct fib6_config *cfg, struct netlink_ext_ack *extack) { - struct rt6_info *rt, *rt_cache; + struct rt6_info *rt_cache; struct fib6_table *table; + struct fib6_info *rt; struct fib6_node *fn; int err = -ESRCH; @@ -3121,25 +3165,29 @@ static int ip6_route_del(struct fib6_config *cfg, if (fn) { for_each_fib6_node_rt_rcu(fn) { if (cfg->fc_flags & RTF_CACHE) { + int rc; + rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, &cfg->fc_src); - if (!rt_cache) - continue; - rt = rt_cache; + if (rt_cache) { + rc = ip6_del_cached_rt(rt_cache, cfg); + if (rc != -ESRCH) + return rc; + } + continue; } if (cfg->fc_ifindex && - (!rt->dst.dev || - rt->dst.dev->ifindex != cfg->fc_ifindex)) + (!rt->fib6_nh.nh_dev || + rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex)) continue; if (cfg->fc_flags & RTF_GATEWAY && - !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) + !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw)) continue; - if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) + if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric) continue; - if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol) + if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol) continue; - if (!dst_hold_safe(&rt->dst)) - break; + fib6_info_hold(rt); rcu_read_unlock(); /* if gateway was specified only delete the one hop */ @@ -3242,7 +3290,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu NEIGH_UPDATE_F_ISROUTER)), NDISC_REDIRECT, &ndopts); - nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); + nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL); if (!nrt) goto out; @@ -3250,14 +3298,13 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu if (on_link) nrt->rt6i_flags &= ~RTF_GATEWAY; - nrt->rt6i_protocol = RTPROT_REDIRECT; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; /* No need to remove rt from the exception table if rt is * a cached route because rt6_insert_exception() will * takes care of it */ - if (rt6_insert_exception(nrt, rt)) { + if (rt6_insert_exception(nrt, rt->from)) { dst_release_immediate(&nrt->dst); goto out; } @@ -3272,44 +3319,8 @@ out: neigh_release(neigh); } -/* - * Misc support functions - */ - -static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) -{ - BUG_ON(from->from); - - rt->rt6i_flags &= ~RTF_EXPIRES; - dst_hold(&from->dst); - rt->from = from; - dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); -} - -static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) -{ - rt->dst.input = ort->dst.input; - rt->dst.output = ort->dst.output; - rt->rt6i_dst = ort->rt6i_dst; - rt->dst.error = ort->dst.error; - rt->rt6i_idev = ort->rt6i_idev; - if (rt->rt6i_idev) - in6_dev_hold(rt->rt6i_idev); - rt->dst.lastuse = jiffies; - rt->rt6i_gateway = ort->rt6i_gateway; - rt->rt6i_flags = ort->rt6i_flags; - rt6_set_from(rt, ort); - rt->rt6i_metric = ort->rt6i_metric; -#ifdef CONFIG_IPV6_SUBTREES - rt->rt6i_src = ort->rt6i_src; -#endif - rt->rt6i_prefsrc = ort->rt6i_prefsrc; - rt->rt6i_table = ort->rt6i_table; - rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate); -} - #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct fib6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev) @@ -3317,7 +3328,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; int ifindex = dev->ifindex; struct fib6_node *fn; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; struct fib6_table *table; table = fib6_get_table(net, tb_id); @@ -3330,13 +3341,13 @@ static struct rt6_info *rt6_get_route_info(struct net *net, goto out; for_each_fib6_node_rt_rcu(fn) { - if (rt->dst.dev->ifindex != ifindex) + if (rt->fib6_nh.nh_dev->ifindex != ifindex) continue; - if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) + if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) continue; - if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) + if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr)) continue; - ip6_hold_safe(NULL, &rt, false); + fib6_info_hold(rt); break; } out: @@ -3344,7 +3355,7 @@ out: return rt; } -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct fib6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev, @@ -3357,6 +3368,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), .fc_protocol = RTPROT_RA, + .fc_type = RTN_UNICAST, .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = net, @@ -3370,36 +3382,39 @@ static struct rt6_info *rt6_add_route_info(struct net *net, if (!prefixlen) cfg.fc_flags |= RTF_DEFAULT; - ip6_route_add(&cfg, NULL); + ip6_route_add(&cfg, GFP_ATOMIC, NULL); return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); } #endif -struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) +struct fib6_info *rt6_get_dflt_router(struct net *net, + const struct in6_addr *addr, + struct net_device *dev) { u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; - struct rt6_info *rt; + struct fib6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev_net(dev), tb_id); + table = fib6_get_table(net, tb_id); if (!table) return NULL; rcu_read_lock(); for_each_fib6_node_rt_rcu(&table->tb6_root) { - if (dev == rt->dst.dev && - ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && - ipv6_addr_equal(&rt->rt6i_gateway, addr)) + if (dev == rt->fib6_nh.nh_dev && + ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && + ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr)) break; } if (rt) - ip6_hold_safe(NULL, &rt, false); + fib6_info_hold(rt); rcu_read_unlock(); return rt; } -struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, +struct fib6_info *rt6_add_dflt_router(struct net *net, + const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref) { @@ -3410,14 +3425,15 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), .fc_protocol = RTPROT_RA, + .fc_type = RTN_UNICAST, .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, - .fc_nlinfo.nl_net = dev_net(dev), + .fc_nlinfo.nl_net = net, }; cfg.fc_gateway = *gwaddr; - if (!ip6_route_add(&cfg, NULL)) { + if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) { struct fib6_table *table; table = fib6_get_table(dev_net(dev), cfg.fc_table); @@ -3425,24 +3441,25 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, table->flags |= RT6_TABLE_HAS_DFLT_ROUTER; } - return rt6_get_dflt_router(gwaddr, dev); + return rt6_get_dflt_router(net, gwaddr, dev); } -static void __rt6_purge_dflt_routers(struct fib6_table *table) +static void __rt6_purge_dflt_routers(struct net *net, + struct fib6_table *table) { - struct rt6_info *rt; + struct fib6_info *rt; restart: rcu_read_lock(); for_each_fib6_node_rt_rcu(&table->tb6_root) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && - (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { - if (dst_hold_safe(&rt->dst)) { - rcu_read_unlock(); - ip6_del_rt(rt); - } else { - rcu_read_unlock(); - } + struct net_device *dev = fib6_info_nh_dev(rt); + struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL; + + if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) && + (!idev || idev->cnf.accept_ra != 2)) { + fib6_info_hold(rt); + rcu_read_unlock(); + ip6_del_rt(net, rt); goto restart; } } @@ -3463,7 +3480,7 @@ void rt6_purge_dflt_routers(struct net *net) head = &net->ipv6.fib_table_hash[h]; hlist_for_each_entry_rcu(table, head, tb6_hlist) { if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) - __rt6_purge_dflt_routers(table); + __rt6_purge_dflt_routers(net, table); } } @@ -3484,6 +3501,7 @@ static void rtmsg_to_fib6_config(struct net *net, cfg->fc_dst_len = rtmsg->rtmsg_dst_len; cfg->fc_src_len = rtmsg->rtmsg_src_len; cfg->fc_flags = rtmsg->rtmsg_flags; + cfg->fc_type = rtmsg->rtmsg_type; cfg->fc_nlinfo.nl_net = net; @@ -3513,7 +3531,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) rtnl_lock(); switch (cmd) { case SIOCADDRT: - err = ip6_route_add(&cfg, NULL); + err = ip6_route_add(&cfg, GFP_KERNEL, NULL); break; case SIOCDELRT: err = ip6_route_del(&cfg, NULL); @@ -3583,40 +3601,40 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff * Allocate a dst for local (unicast / anycast) address. */ -struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, - const struct in6_addr *addr, - bool anycast) +struct fib6_info *addrconf_f6i_alloc(struct net *net, + struct inet6_dev *idev, + const struct in6_addr *addr, + bool anycast, gfp_t gfp_flags) { u32 tb_id; - struct net *net = dev_net(idev->dev); struct net_device *dev = idev->dev; - struct rt6_info *rt; + struct fib6_info *f6i; - rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); - if (!rt) + f6i = fib6_info_alloc(gfp_flags); + if (!f6i) return ERR_PTR(-ENOMEM); - in6_dev_hold(idev); - - rt->dst.flags |= DST_HOST; - rt->dst.input = ip6_input; - rt->dst.output = ip6_output; - rt->rt6i_idev = idev; - - rt->rt6i_protocol = RTPROT_KERNEL; - rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; - if (anycast) - rt->rt6i_flags |= RTF_ANYCAST; - else - rt->rt6i_flags |= RTF_LOCAL; + f6i->dst_nocount = true; + f6i->dst_host = true; + f6i->fib6_protocol = RTPROT_KERNEL; + f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP; + if (anycast) { + f6i->fib6_type = RTN_ANYCAST; + f6i->fib6_flags |= RTF_ANYCAST; + } else { + f6i->fib6_type = RTN_LOCAL; + f6i->fib6_flags |= RTF_LOCAL; + } - rt->rt6i_gateway = *addr; - rt->rt6i_dst.addr = *addr; - rt->rt6i_dst.plen = 128; + f6i->fib6_nh.nh_gw = *addr; + dev_hold(dev); + f6i->fib6_nh.nh_dev = dev; + f6i->fib6_dst.addr = *addr; + f6i->fib6_dst.plen = 128; tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; - rt->rt6i_table = fib6_get_table(net, tb_id); + f6i->fib6_table = fib6_get_table(net, tb_id); - return rt; + return f6i; } /* remove deleted ip from prefsrc entries */ @@ -3626,18 +3644,18 @@ struct arg_dev_net_ip { struct in6_addr *addr; }; -static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) +static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg) { struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; struct net *net = ((struct arg_dev_net_ip *)arg)->net; struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; - if (((void *)rt->dst.dev == dev || !dev) && - rt != net->ipv6.ip6_null_entry && - ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { + if (((void *)rt->fib6_nh.nh_dev == dev || !dev) && + rt != net->ipv6.fib6_null_entry && + ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) { spin_lock_bh(&rt6_exception_lock); /* remove prefsrc entry */ - rt->rt6i_prefsrc.plen = 0; + rt->fib6_prefsrc.plen = 0; /* need to update cache as well */ rt6_exceptions_remove_prefsrc(rt); spin_unlock_bh(&rt6_exception_lock); @@ -3659,12 +3677,12 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) /* Remove routers and update dst entries when gateway turn into host. */ -static int fib6_clean_tohost(struct rt6_info *rt, void *arg) +static int fib6_clean_tohost(struct fib6_info *rt, void *arg) { struct in6_addr *gateway = (struct in6_addr *)arg; - if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && - ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { + if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && + ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) { return -1; } @@ -3690,85 +3708,85 @@ struct arg_netdev_event { }; }; -static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt) +static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt) { - struct rt6_info *iter; + struct fib6_info *iter; struct fib6_node *fn; - fn = rcu_dereference_protected(rt->rt6i_node, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&rt->fib6_table->tb6_lock)); iter = rcu_dereference_protected(fn->leaf, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); while (iter) { - if (iter->rt6i_metric == rt->rt6i_metric && + if (iter->fib6_metric == rt->fib6_metric && rt6_qualify_for_ecmp(iter)) return iter; iter = rcu_dereference_protected(iter->rt6_next, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); } return NULL; } -static bool rt6_is_dead(const struct rt6_info *rt) +static bool rt6_is_dead(const struct fib6_info *rt) { - if (rt->rt6i_nh_flags & RTNH_F_DEAD || - (rt->rt6i_nh_flags & RTNH_F_LINKDOWN && - rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD || + (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && + fib6_ignore_linkdown(rt))) return true; return false; } -static int rt6_multipath_total_weight(const struct rt6_info *rt) +static int rt6_multipath_total_weight(const struct fib6_info *rt) { - struct rt6_info *iter; + struct fib6_info *iter; int total = 0; if (!rt6_is_dead(rt)) - total += rt->rt6i_nh_weight; + total += rt->fib6_nh.nh_weight; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) { + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { if (!rt6_is_dead(iter)) - total += iter->rt6i_nh_weight; + total += iter->fib6_nh.nh_weight; } return total; } -static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total) +static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total) { int upper_bound = -1; if (!rt6_is_dead(rt)) { - *weight += rt->rt6i_nh_weight; + *weight += rt->fib6_nh.nh_weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, total) - 1; } - atomic_set(&rt->rt6i_nh_upper_bound, upper_bound); + atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound); } -static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total) +static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total) { - struct rt6_info *iter; + struct fib6_info *iter; int weight = 0; rt6_upper_bound_set(rt, &weight, total); - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) rt6_upper_bound_set(iter, &weight, total); } -void rt6_multipath_rebalance(struct rt6_info *rt) +void rt6_multipath_rebalance(struct fib6_info *rt) { - struct rt6_info *first; + struct fib6_info *first; int total; /* In case the entire multipath route was marked for flushing, * then there is no need to rebalance upon the removal of every * sibling route. */ - if (!rt->rt6i_nsiblings || rt->should_flush) + if (!rt->fib6_nsiblings || rt->should_flush) return; /* During lookup routes are evaluated in order, so we need to @@ -3783,14 +3801,14 @@ void rt6_multipath_rebalance(struct rt6_info *rt) rt6_multipath_upper_bound_set(first, total); } -static int fib6_ifup(struct rt6_info *rt, void *p_arg) +static int fib6_ifup(struct fib6_info *rt, void *p_arg) { const struct arg_netdev_event *arg = p_arg; - const struct net *net = dev_net(arg->dev); + struct net *net = dev_net(arg->dev); - if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) { - rt->rt6i_nh_flags &= ~arg->nh_flags; - fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt); + if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) { + rt->fib6_nh.nh_flags &= ~arg->nh_flags; + fib6_update_sernum_upto_root(net, rt); rt6_multipath_rebalance(rt); } @@ -3812,95 +3830,96 @@ void rt6_sync_up(struct net_device *dev, unsigned int nh_flags) fib6_clean_all(dev_net(dev), fib6_ifup, &arg); } -static bool rt6_multipath_uses_dev(const struct rt6_info *rt, +static bool rt6_multipath_uses_dev(const struct fib6_info *rt, const struct net_device *dev) { - struct rt6_info *iter; + struct fib6_info *iter; - if (rt->dst.dev == dev) + if (rt->fib6_nh.nh_dev == dev) return true; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) - if (iter->dst.dev == dev) + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) + if (iter->fib6_nh.nh_dev == dev) return true; return false; } -static void rt6_multipath_flush(struct rt6_info *rt) +static void rt6_multipath_flush(struct fib6_info *rt) { - struct rt6_info *iter; + struct fib6_info *iter; rt->should_flush = 1; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) iter->should_flush = 1; } -static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt, +static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt, const struct net_device *down_dev) { - struct rt6_info *iter; + struct fib6_info *iter; unsigned int dead = 0; - if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_dev == down_dev || + rt->fib6_nh.nh_flags & RTNH_F_DEAD) dead++; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) - if (iter->dst.dev == down_dev || - iter->rt6i_nh_flags & RTNH_F_DEAD) + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) + if (iter->fib6_nh.nh_dev == down_dev || + iter->fib6_nh.nh_flags & RTNH_F_DEAD) dead++; return dead; } -static void rt6_multipath_nh_flags_set(struct rt6_info *rt, +static void rt6_multipath_nh_flags_set(struct fib6_info *rt, const struct net_device *dev, unsigned int nh_flags) { - struct rt6_info *iter; + struct fib6_info *iter; - if (rt->dst.dev == dev) - rt->rt6i_nh_flags |= nh_flags; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) - if (iter->dst.dev == dev) - iter->rt6i_nh_flags |= nh_flags; + if (rt->fib6_nh.nh_dev == dev) + rt->fib6_nh.nh_flags |= nh_flags; + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) + if (iter->fib6_nh.nh_dev == dev) + iter->fib6_nh.nh_flags |= nh_flags; } /* called with write lock held for table with rt */ -static int fib6_ifdown(struct rt6_info *rt, void *p_arg) +static int fib6_ifdown(struct fib6_info *rt, void *p_arg) { const struct arg_netdev_event *arg = p_arg; const struct net_device *dev = arg->dev; - const struct net *net = dev_net(dev); + struct net *net = dev_net(dev); - if (rt == net->ipv6.ip6_null_entry) + if (rt == net->ipv6.fib6_null_entry) return 0; switch (arg->event) { case NETDEV_UNREGISTER: - return rt->dst.dev == dev ? -1 : 0; + return rt->fib6_nh.nh_dev == dev ? -1 : 0; case NETDEV_DOWN: if (rt->should_flush) return -1; - if (!rt->rt6i_nsiblings) - return rt->dst.dev == dev ? -1 : 0; + if (!rt->fib6_nsiblings) + return rt->fib6_nh.nh_dev == dev ? -1 : 0; if (rt6_multipath_uses_dev(rt, dev)) { unsigned int count; count = rt6_multipath_dead_count(rt, dev); - if (rt->rt6i_nsiblings + 1 == count) { + if (rt->fib6_nsiblings + 1 == count) { rt6_multipath_flush(rt); return -1; } rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); - fib6_update_sernum(rt); + fib6_update_sernum(net, rt); rt6_multipath_rebalance(rt); } return -2; case NETDEV_CHANGE: - if (rt->dst.dev != dev || - rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) + if (rt->fib6_nh.nh_dev != dev || + rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) break; - rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; + rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; rt6_multipath_rebalance(rt); break; } @@ -3932,7 +3951,7 @@ struct rt6_mtu_change_arg { unsigned int mtu; }; -static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) +static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg) { struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; struct inet6_dev *idev; @@ -3952,12 +3971,15 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) Since RFC 1981 doesn't include administrative MTU increase update PMTU increase is a MUST. (i.e. jumbo frame) */ - if (rt->dst.dev == arg->dev && - !dst_metric_locked(&rt->dst, RTAX_MTU)) { + if (rt->fib6_nh.nh_dev == arg->dev && + !fib6_metric_locked(rt, RTAX_MTU)) { + u32 mtu = rt->fib6_pmtu; + + if (mtu >= arg->mtu || + (mtu < arg->mtu && mtu == idev->cnf.mtu6)) + fib6_metric_set(rt, RTAX_MTU, arg->mtu); + spin_lock_bh(&rt6_exception_lock); - if (dst_metric_raw(&rt->dst, RTAX_MTU) && - rt6_mtu_change_route_allowed(idev, rt, arg->mtu)) - dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); rt6_exceptions_update_pmtu(idev, rt, arg->mtu); spin_unlock_bh(&rt6_exception_lock); } @@ -4116,9 +4138,8 @@ errout: } struct rt6_nh { - struct rt6_info *rt6_info; + struct fib6_info *fib6_info; struct fib6_config r_cfg; - struct mx6_config mxc; struct list_head next; }; @@ -4133,23 +4154,25 @@ static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) } } -static int ip6_route_info_append(struct list_head *rt6_nh_list, - struct rt6_info *rt, struct fib6_config *r_cfg) +static int ip6_route_info_append(struct net *net, + struct list_head *rt6_nh_list, + struct fib6_info *rt, + struct fib6_config *r_cfg) { struct rt6_nh *nh; int err = -EEXIST; list_for_each_entry(nh, rt6_nh_list, next) { - /* check if rt6_info already exists */ - if (rt6_duplicate_nexthop(nh->rt6_info, rt)) + /* check if fib6_info already exists */ + if (rt6_duplicate_nexthop(nh->fib6_info, rt)) return err; } nh = kzalloc(sizeof(*nh), GFP_KERNEL); if (!nh) return -ENOMEM; - nh->rt6_info = rt; - err = ip6_convert_metrics(&nh->mxc, r_cfg); + nh->fib6_info = rt; + err = ip6_convert_metrics(net, rt, r_cfg); if (err) { kfree(nh); return err; @@ -4160,8 +4183,8 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list, return 0; } -static void ip6_route_mpath_notify(struct rt6_info *rt, - struct rt6_info *rt_last, +static void ip6_route_mpath_notify(struct fib6_info *rt, + struct fib6_info *rt_last, struct nl_info *info, __u16 nlflags) { @@ -4171,10 +4194,10 @@ static void ip6_route_mpath_notify(struct rt6_info *rt, * nexthop. Since sibling routes are always added at the end of * the list, find the first sibling of the last route appended */ - if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) { - rt = list_first_entry(&rt_last->rt6i_siblings, - struct rt6_info, - rt6i_siblings); + if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) { + rt = list_first_entry(&rt_last->fib6_siblings, + struct fib6_info, + fib6_siblings); } if (rt) @@ -4184,11 +4207,11 @@ static void ip6_route_mpath_notify(struct rt6_info *rt, static int ip6_route_multipath_add(struct fib6_config *cfg, struct netlink_ext_ack *extack) { - struct rt6_info *rt_notif = NULL, *rt_last = NULL; + struct fib6_info *rt_notif = NULL, *rt_last = NULL; struct nl_info *info = &cfg->fc_nlinfo; struct fib6_config r_cfg; struct rtnexthop *rtnh; - struct rt6_info *rt; + struct fib6_info *rt; struct rt6_nh *err_nh; struct rt6_nh *nh, *nh_safe; __u16 nlflags; @@ -4208,7 +4231,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, rtnh = (struct rtnexthop *)cfg->fc_mp; /* Parse a Multipath Entry and build a list (rt6_nh_list) of - * rt6_info structs per nexthop + * fib6_info structs per nexthop */ while (rtnh_ok(rtnh, remaining)) { memcpy(&r_cfg, cfg, sizeof(*cfg)); @@ -4231,18 +4254,19 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, } r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); - rt = ip6_route_info_create(&r_cfg, extack); + rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; goto cleanup; } - rt->rt6i_nh_weight = rtnh->rtnh_hops + 1; + rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1; - err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); + err = ip6_route_info_append(info->nl_net, &rt6_nh_list, + rt, &r_cfg); if (err) { - dst_release_immediate(&rt->dst); + fib6_info_release(rt); goto cleanup; } @@ -4257,14 +4281,16 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { - rt_last = nh->rt6_info; - err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack); + rt_last = nh->fib6_info; + err = __ip6_ins_rt(nh->fib6_info, info, extack); + fib6_info_release(nh->fib6_info); + /* save reference to first route for notification */ if (!rt_notif && !err) - rt_notif = nh->rt6_info; + rt_notif = nh->fib6_info; - /* nh->rt6_info is used or freed at this point, reset to NULL*/ - nh->rt6_info = NULL; + /* nh->fib6_info is used or freed at this point, reset to NULL*/ + nh->fib6_info = NULL; if (err) { if (replace && nhn) ip6_print_replace_route_err(&rt6_nh_list); @@ -4305,9 +4331,8 @@ add_errout: cleanup: list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { - if (nh->rt6_info) - dst_release_immediate(&nh->rt6_info->dst); - kfree(nh->mxc.mx); + if (nh->fib6_info) + fib6_info_release(nh->fib6_info); list_del(&nh->next); kfree(nh); } @@ -4384,20 +4409,20 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (cfg.fc_mp) return ip6_route_multipath_add(&cfg, extack); else - return ip6_route_add(&cfg, extack); + return ip6_route_add(&cfg, GFP_KERNEL, extack); } -static size_t rt6_nlmsg_size(struct rt6_info *rt) +static size_t rt6_nlmsg_size(struct fib6_info *rt) { int nexthop_len = 0; - if (rt->rt6i_nsiblings) { + if (rt->fib6_nsiblings) { nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ + NLA_ALIGN(sizeof(struct rtnexthop)) + nla_total_size(16) /* RTA_GATEWAY */ - + lwtunnel_get_encap_size(rt->dst.lwtstate); + + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate); - nexthop_len *= rt->rt6i_nsiblings; + nexthop_len *= rt->fib6_nsiblings; } return NLMSG_ALIGN(sizeof(struct rtmsg)) @@ -4413,38 +4438,41 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt) + nla_total_size(sizeof(struct rta_cacheinfo)) + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + nla_total_size(1) /* RTA_PREF */ - + lwtunnel_get_encap_size(rt->dst.lwtstate) + + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate) + nexthop_len; } -static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, +static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt, unsigned int *flags, bool skip_oif) { - if (rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) *flags |= RTNH_F_DEAD; - if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) { + if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) { *flags |= RTNH_F_LINKDOWN; - if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) + + rcu_read_lock(); + if (fib6_ignore_linkdown(rt)) *flags |= RTNH_F_DEAD; + rcu_read_unlock(); } - if (rt->rt6i_flags & RTF_GATEWAY) { - if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) + if (rt->fib6_flags & RTF_GATEWAY) { + if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0) goto nla_put_failure; } - *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK); - if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD) + *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK); + if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD) *flags |= RTNH_F_OFFLOAD; /* not needed for multipath encoding b/c it has a rtnexthop struct */ - if (!skip_oif && rt->dst.dev && - nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) + if (!skip_oif && rt->fib6_nh.nh_dev && + nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex)) goto nla_put_failure; - if (rt->dst.lwtstate && - lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) + if (rt->fib6_nh.nh_lwtstate && + lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0) goto nla_put_failure; return 0; @@ -4454,8 +4482,9 @@ nla_put_failure: } /* add multipath next hop */ -static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) +static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt) { + const struct net_device *dev = rt->fib6_nh.nh_dev; struct rtnexthop *rtnh; unsigned int flags = 0; @@ -4463,8 +4492,8 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) if (!rtnh) goto nla_put_failure; - rtnh->rtnh_hops = rt->rt6i_nh_weight - 1; - rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0; + rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1; + rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; if (rt6_nexthop_info(skb, rt, &flags, true) < 0) goto nla_put_failure; @@ -4480,16 +4509,16 @@ nla_put_failure: return -EMSGSIZE; } -static int rt6_fill_node(struct net *net, - struct sk_buff *skb, struct rt6_info *rt, - struct in6_addr *dst, struct in6_addr *src, +static int rt6_fill_node(struct net *net, struct sk_buff *skb, + struct fib6_info *rt, struct dst_entry *dst, + struct in6_addr *dest, struct in6_addr *src, int iif, int type, u32 portid, u32 seq, unsigned int flags) { - u32 metrics[RTAX_MAX]; struct rtmsg *rtm; struct nlmsghdr *nlh; - long expires; + long expires = 0; + u32 *pmetrics; u32 table; nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); @@ -4498,53 +4527,31 @@ static int rt6_fill_node(struct net *net, rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET6; - rtm->rtm_dst_len = rt->rt6i_dst.plen; - rtm->rtm_src_len = rt->rt6i_src.plen; + rtm->rtm_dst_len = rt->fib6_dst.plen; + rtm->rtm_src_len = rt->fib6_src.plen; rtm->rtm_tos = 0; - if (rt->rt6i_table) - table = rt->rt6i_table->tb6_id; + if (rt->fib6_table) + table = rt->fib6_table->tb6_id; else table = RT6_TABLE_UNSPEC; rtm->rtm_table = table; if (nla_put_u32(skb, RTA_TABLE, table)) goto nla_put_failure; - if (rt->rt6i_flags & RTF_REJECT) { - switch (rt->dst.error) { - case -EINVAL: - rtm->rtm_type = RTN_BLACKHOLE; - break; - case -EACCES: - rtm->rtm_type = RTN_PROHIBIT; - break; - case -EAGAIN: - rtm->rtm_type = RTN_THROW; - break; - default: - rtm->rtm_type = RTN_UNREACHABLE; - break; - } - } - else if (rt->rt6i_flags & RTF_LOCAL) - rtm->rtm_type = RTN_LOCAL; - else if (rt->rt6i_flags & RTF_ANYCAST) - rtm->rtm_type = RTN_ANYCAST; - else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) - rtm->rtm_type = RTN_LOCAL; - else - rtm->rtm_type = RTN_UNICAST; + + rtm->rtm_type = rt->fib6_type; rtm->rtm_flags = 0; rtm->rtm_scope = RT_SCOPE_UNIVERSE; - rtm->rtm_protocol = rt->rt6i_protocol; + rtm->rtm_protocol = rt->fib6_protocol; - if (rt->rt6i_flags & RTF_CACHE) + if (rt->fib6_flags & RTF_CACHE) rtm->rtm_flags |= RTM_F_CLONED; - if (dst) { - if (nla_put_in6_addr(skb, RTA_DST, dst)) + if (dest) { + if (nla_put_in6_addr(skb, RTA_DST, dest)) goto nla_put_failure; rtm->rtm_dst_len = 128; } else if (rtm->rtm_dst_len) - if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) + if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr)) goto nla_put_failure; #ifdef CONFIG_IPV6_SUBTREES if (src) { @@ -4552,12 +4559,12 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; rtm->rtm_src_len = 128; } else if (rtm->rtm_src_len && - nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) + nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr)) goto nla_put_failure; #endif if (iif) { #ifdef CONFIG_IPV6_MROUTE - if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { + if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) { int err = ip6mr_get_route(net, skb, rtm, portid); if (err == 0) @@ -4568,34 +4575,32 @@ static int rt6_fill_node(struct net *net, #endif if (nla_put_u32(skb, RTA_IIF, iif)) goto nla_put_failure; - } else if (dst) { + } else if (dest) { struct in6_addr saddr_buf; - if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && + if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } - if (rt->rt6i_prefsrc.plen) { + if (rt->fib6_prefsrc.plen) { struct in6_addr saddr_buf; - saddr_buf = rt->rt6i_prefsrc.addr; + saddr_buf = rt->fib6_prefsrc.addr; if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } - memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); - if (rt->rt6i_pmtu) - metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; - if (rtnetlink_put_metrics(skb, metrics) < 0) + pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics; + if (rtnetlink_put_metrics(skb, pmetrics) < 0) goto nla_put_failure; - if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) + if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric)) goto nla_put_failure; /* For multipath routes, walk the siblings list and add * each as a nexthop within RTA_MULTIPATH. */ - if (rt->rt6i_nsiblings) { - struct rt6_info *sibling, *next_sibling; + if (rt->fib6_nsiblings) { + struct fib6_info *sibling, *next_sibling; struct nlattr *mp; mp = nla_nest_start(skb, RTA_MULTIPATH); @@ -4606,7 +4611,7 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; list_for_each_entry_safe(sibling, next_sibling, - &rt->rt6i_siblings, rt6i_siblings) { + &rt->fib6_siblings, fib6_siblings) { if (rt6_add_nexthop(skb, sibling) < 0) goto nla_put_failure; } @@ -4617,12 +4622,15 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; } - expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; + if (rt->fib6_flags & RTF_EXPIRES) { + expires = dst ? dst->expires : rt->expires; + expires -= jiffies; + } - if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) + if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0) goto nla_put_failure; - if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) + if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags))) goto nla_put_failure; @@ -4634,12 +4642,12 @@ nla_put_failure: return -EMSGSIZE; } -int rt6_dump_route(struct rt6_info *rt, void *p_arg) +int rt6_dump_route(struct fib6_info *rt, void *p_arg) { struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; struct net *net = arg->net; - if (rt == net->ipv6.ip6_null_entry) + if (rt == net->ipv6.fib6_null_entry) return 0; if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { @@ -4647,16 +4655,15 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) /* user wants prefix routes only */ if (rtm->rtm_flags & RTM_F_PREFIX && - !(rt->rt6i_flags & RTF_PREFIX_RT)) { + !(rt->fib6_flags & RTF_PREFIX_RT)) { /* success since this is not a prefix route */ return 1; } } - return rt6_fill_node(net, - arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, - NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, - NLM_F_MULTI); + return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0, + RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid, + arg->cb->nlh->nlmsg_seq, NLM_F_MULTI); } static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, @@ -4753,14 +4760,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout; } - if (fibmatch && rt->from) { - struct rt6_info *ort = rt->from; - - dst_hold(&ort->dst); - ip6_rt_put(rt); - rt = ort; - } - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { ip6_rt_put(rt); @@ -4770,13 +4769,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, skb_dst_set(skb, &rt->dst); if (fibmatch) - err = rt6_fill_node(net, skb, rt, NULL, NULL, iif, + err = rt6_fill_node(net, skb, rt->from, NULL, NULL, NULL, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0); else - err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0); + err = rt6_fill_node(net, skb, rt->from, dst, + &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, + 0); if (err < 0) { kfree_skb(skb); goto errout; @@ -4787,7 +4787,7 @@ errout: return err; } -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, +void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int nlm_flags) { struct sk_buff *skb; @@ -4802,8 +4802,8 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, if (!skb) goto errout; - err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, - event, info->portid, seq, nlm_flags); + err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0, + event, info->portid, seq, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -4828,6 +4828,7 @@ static int ip6_route_dev_notify(struct notifier_block *this, return NOTIFY_OK; if (event == NETDEV_REGISTER) { + net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev; net->ipv6.ip6_null_entry->dst.dev = dev; net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -5024,11 +5025,17 @@ static int __net_init ip6_route_net_init(struct net *net) if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) goto out_ip6_dst_ops; + net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template, + sizeof(*net->ipv6.fib6_null_entry), + GFP_KERNEL); + if (!net->ipv6.fib6_null_entry) + goto out_ip6_dst_entries; + net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, sizeof(*net->ipv6.ip6_null_entry), GFP_KERNEL); if (!net->ipv6.ip6_null_entry) - goto out_ip6_dst_entries; + goto out_fib6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; dst_init_metrics(&net->ipv6.ip6_null_entry->dst, ip6_template_metrics, true); @@ -5075,6 +5082,8 @@ out_ip6_prohibit_entry: out_ip6_null_entry: kfree(net->ipv6.ip6_null_entry); #endif +out_fib6_null_entry: + kfree(net->ipv6.fib6_null_entry); out_ip6_dst_entries: dst_entries_destroy(&net->ipv6.ip6_dst_ops); out_ip6_dst_ops: @@ -5083,6 +5092,7 @@ out_ip6_dst_ops: static void __net_exit ip6_route_net_exit(struct net *net) { + kfree(net->ipv6.fib6_null_entry); kfree(net->ipv6.ip6_null_entry); #ifdef CONFIG_IPV6_MULTIPLE_TABLES kfree(net->ipv6.ip6_prohibit_entry); @@ -5153,6 +5163,7 @@ void __init ip6_route_init_special_entries(void) /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ + init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 416fe67271a9..2cff209d0fc1 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -107,8 +107,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, * it was magically lost, so this code needs audit */ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); - xdst->u.rt6.rt6i_metric = rt->rt6i_metric; - xdst->u.rt6.rt6i_node = rt->rt6i_node; xdst->route_cookie = rt6_get_cookie(rt); xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst; diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index f7d47c89d658..2dfb492a7c94 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -697,6 +697,9 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window)) goto prop_msg_full; + if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu)) + goto prop_msg_full; nla_nest_end(msg->skb, prop); @@ -979,12 +982,23 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_TOL]) { b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); - tipc_node_apply_tolerance(net, b); + tipc_node_apply_property(net, b, TIPC_NLA_PROP_TOL); } if (props[TIPC_NLA_PROP_PRIO]) b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (props[TIPC_NLA_PROP_MTU]) { + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) + return -EINVAL; +#ifdef CONFIG_TIPC_MEDIA_UDP + if (tipc_udp_mtu_bad(nla_get_u32 + (props[TIPC_NLA_PROP_MTU]))) + return -EINVAL; + b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); + tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU); +#endif + } } return 0; @@ -1029,6 +1043,9 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg, goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window)) goto prop_msg_full; + if (media->type_id == TIPC_MEDIA_TYPE_UDP) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu)) + goto prop_msg_full; nla_nest_end(msg->skb, prop); nla_nest_end(msg->skb, attrs); @@ -1158,6 +1175,16 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (props[TIPC_NLA_PROP_MTU]) { + if (m->type_id != TIPC_MEDIA_TYPE_UDP) + return -EINVAL; +#ifdef CONFIG_TIPC_MEDIA_UDP + if (tipc_udp_mtu_bad(nla_get_u32 + (props[TIPC_NLA_PROP_MTU]))) + return -EINVAL; + m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); +#endif + } } return 0; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 6efcee63a381..394290cbbb1d 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -94,6 +94,8 @@ struct tipc_bearer; * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure * @window: default window (in packets) before declaring link congestion + * @mtu: max packet size bearer can support for media type not dependent on + * underlying device MTU * @type_id: TIPC media identifier * @hwaddr_len: TIPC media address len * @name: media name @@ -118,6 +120,7 @@ struct tipc_media { u32 priority; u32 tolerance; u32 window; + u32 mtu; u32 type_id; u32 hwaddr_len; char name[TIPC_MAX_MEDIA_NAME]; diff --git a/net/tipc/node.c b/net/tipc/node.c index c77dd2f3c589..b71e4e376bb9 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1681,7 +1681,8 @@ discard: kfree_skb(skb); } -void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b) +void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, + int prop) { struct tipc_net *tn = tipc_net(net); int bearer_id = b->identity; @@ -1696,8 +1697,13 @@ void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b) list_for_each_entry_rcu(n, &tn->node_list, list) { tipc_node_write_lock(n); e = &n->links[bearer_id]; - if (e->link) - tipc_link_set_tolerance(e->link, b->tolerance, &xmitq); + if (e->link) { + if (prop == TIPC_NLA_PROP_TOL) + tipc_link_set_tolerance(e->link, b->tolerance, + &xmitq); + else if (prop == TIPC_NLA_PROP_MTU) + tipc_link_set_mtu(e->link, b->mtu); + } tipc_node_write_unlock(n); tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr); } diff --git a/net/tipc/node.h b/net/tipc/node.h index f24b83500df1..bb271a37c93f 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -67,7 +67,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128, struct tipc_media_addr *maddr, bool *respond, bool *dupl_addr); void tipc_node_delete_links(struct net *net, int bearer_id); -void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b); +void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, int prop); int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index e7d91f5d5cae..9783101bc4a9 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -713,8 +713,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, err = -EINVAL; goto err; } - b->mtu = dev->mtu - sizeof(struct iphdr) - - sizeof(struct udphdr); + b->mtu = b->media->mtu; #if IS_ENABLED(CONFIG_IPV6) } else if (local.proto == htons(ETH_P_IPV6)) { udp_conf.family = AF_INET6; @@ -803,6 +802,7 @@ struct tipc_media udp_media_info = { .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .window = TIPC_DEF_LINK_WIN, + .mtu = TIPC_DEF_LINK_UDP_MTU, .type_id = TIPC_MEDIA_TYPE_UDP, .hwaddr_len = 0, .name = "udp" diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h index 281bbae87726..e7455cc73e16 100644 --- a/net/tipc/udp_media.h +++ b/net/tipc/udp_media.h @@ -38,9 +38,23 @@ #ifndef _TIPC_UDP_MEDIA_H #define _TIPC_UDP_MEDIA_H +#include <linux/ip.h> +#include <linux/udp.h> + int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr); int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b); int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb); +/* check if configured MTU is too low for tipc headers */ +static inline bool tipc_udp_mtu_bad(u32 mtu) +{ + if (mtu >= (TIPC_MIN_BEARER_MTU + sizeof(struct iphdr) + + sizeof(struct udphdr))) + return false; + + pr_warn("MTU too low for tipc bearer\n"); + return true; +} + #endif #endif |