From 1952e8e0c5156615e344acbb9317e65f11aa9536 Mon Sep 17 00:00:00 2001
From: Varka Bhadram <varkab@cdac.in>
Date: Fri, 20 Jun 2014 17:47:17 +0530
Subject: devicetree: add device tree bindings for cc2520 driver

DT bindings for cc2520 radio driver

Signed-off-by: Varka Bhadram <varkab@cdac.in>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/ieee802154/cc2520.txt  | 29 ++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/ieee802154/cc2520.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt b/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
new file mode 100644
index 000000000000..0071883c08d8
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
@@ -0,0 +1,29 @@
+*CC2520 IEEE 802.15.4 Compatible Radio*
+
+Required properties:
+	- compatible: 		should be "ti,cc2520"
+	- spi-max-frequency:	maximal bus speed (8000000), should be set to 4000000 depends
+				sync or async operation mode
+	- reg:			the chipselect index
+	- pinctrl-0: 		pin control group to be used for this controller.
+	- pinctrl-names: 	must contain a "default" entry.
+	- fifo-gpio:		GPIO spec for the FIFO pin
+	- fifop-gpio:		GPIO spec for the FIFOP pin
+	- sfd-gpio:		GPIO spec for the SFD pin
+	- cca-gpio:		GPIO spec for the CCA pin
+	- vreg-gpio:		GPIO spec for the VREG pin
+	- reset-gpio:		GPIO spec for the RESET pin
+Example:
+	cc2520@0 {
+		compatible = "ti,cc2520";
+		reg = <0>;
+		spi-max-frequency = <4000000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&cc2520_cape_pins>;
+		fifo-gpio = <&gpio1 18 0>;
+		fifop-gpio = <&gpio1 19 0>;
+		sfd-gpio = <&gpio1 13 0>;
+		cca-gpio = <&gpio1 16 0>;
+		vreg-gpio = <&gpio0 31 0>;
+		reset-gpio = <&gpio1 12 0>;
+	};
-- 
cgit v1.2.3


From d93331965729850303f6111381c1a4a9e9b8ae5a Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Wed, 25 Jun 2014 14:44:53 -0700
Subject: ipv6: Allow accepting RA from local IP addresses.

This can be used in virtual networking applications, and
may have other uses as well.  The option is disabled by
default.

A specific use case is setting up virtual routers, bridges, and
hosts on a single OS without the use of network namespaces or
virtual machines.  With proper use of ip rules, routing tables,
veth interface pairs and/or other virtual interfaces,
and applications that can bind to interfaces and/or IP addresses,
it is possibly to create one or more virtual routers with multiple
hosts attached.  The host interfaces can act as IPv6 systems,
with radvd running on the ports in the virtual routers.  With the
option provided in this patch enabled, those hosts can now properly
obtain IPv6 addresses from the radvd.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 12 ++++++++++++
 include/linux/ipv6.h                   |  1 +
 include/uapi/linux/ipv6.h              |  1 +
 include/uapi/linux/sysctl.h            |  1 +
 kernel/sysctl_binary.c                 |  1 +
 net/ipv6/addrconf.c                    | 10 ++++++++++
 net/ipv6/ndisc.c                       | 21 +++++++++++++--------
 7 files changed, 39 insertions(+), 8 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ab42c95f9985..10e216c6e05e 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1210,6 +1210,18 @@ accept_ra_defrtr - BOOLEAN
 	Functional default: enabled if accept_ra is enabled.
 			    disabled if accept_ra is disabled.
 
+accept_ra_from_local - BOOLEAN
+	Accept RA with source-address that is found on local machine
+        if the RA is otherwise proper and able to be accepted.
+        Default is to NOT accept these as it may be an un-intended
+        network loop.
+
+	Functional default:
+           enabled if accept_ra_from_local is enabled
+               on a specific interface.
+	   disabled if accept_ra_from_local is disabled
+               on a specific interface.
+
 accept_ra_pinfo - BOOLEAN
 	Learn Prefix Information in Router Advertisement.
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index c811300b0b0c..b0f2452f1d58 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -39,6 +39,7 @@ struct ipv6_devconf {
 #endif
 	__s32		proxy_ndp;
 	__s32		accept_source_route;
+	__s32		accept_ra_from_local;
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	__s32		optimistic_dad;
 #endif
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 593b0e32d956..efa2666f4b8a 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -163,6 +163,7 @@ enum {
 	DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL,
 	DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL,
 	DEVCONF_SUPPRESS_FRAG_NDISC,
+	DEVCONF_ACCEPT_RA_FROM_LOCAL,
 	DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index 6d6721341f49..43aaba1cc037 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -568,6 +568,7 @@ enum {
 	NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22,
 	NET_IPV6_PROXY_NDP=23,
 	NET_IPV6_ACCEPT_SOURCE_ROUTE=25,
+	NET_IPV6_ACCEPT_RA_FROM_LOCAL=26,
 	__NET_IPV6_MAX
 };
 
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 653cbbd9e7ad..e4ba9a5a5ccb 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -522,6 +522,7 @@ static const struct bin_table bin_net_ipv6_conf_var_table[] = {
 	{ CTL_INT,	NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN,	"accept_ra_rt_info_max_plen" },
 	{ CTL_INT,	NET_IPV6_PROXY_NDP,			"proxy_ndp" },
 	{ CTL_INT,	NET_IPV6_ACCEPT_SOURCE_ROUTE,		"accept_source_route" },
+	{ CTL_INT,	NET_IPV6_ACCEPT_RA_FROM_LOCAL,		"accept_ra_from_local" },
 	{}
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5667b3003af9..358edd2272ac 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -186,6 +186,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.max_desync_factor	= MAX_DESYNC_FACTOR,
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 	.accept_ra_defrtr	= 1,
+	.accept_ra_from_local	= 0,
 	.accept_ra_pinfo	= 1,
 #ifdef CONFIG_IPV6_ROUTER_PREF
 	.accept_ra_rtr_pref	= 1,
@@ -222,6 +223,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.max_desync_factor	= MAX_DESYNC_FACTOR,
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 	.accept_ra_defrtr	= 1,
+	.accept_ra_from_local	= 0,
 	.accept_ra_pinfo	= 1,
 #ifdef CONFIG_IPV6_ROUTER_PREF
 	.accept_ra_rtr_pref	= 1,
@@ -4321,6 +4323,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
 	array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
 	array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
+	array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -5167,6 +5170,13 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec
 		},
+		{
+			.procname	= "accept_ra_from_local",
+			.data		= &ipv6_devconf.accept_ra_from_local,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
 		{
 			/* sentinel */
 		}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 736c11c6d266..a845e3d2057e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1148,11 +1148,15 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		goto skip_defrtr;
 	}
 
-	if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
-			  NULL, 0)) {
+	/* Do not accept RA with source-addr found on local machine unless
+	 * accept_ra_from_local is set to true.
+	 */
+	if (!(in6_dev->cnf.accept_ra_from_local ||
+	      ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
+			    NULL, 0))) {
 		ND_PRINTK(2, info,
-			  "RA: %s, chk_addr failed for dev: %s\n",
-			  __func__, skb->dev->name);
+			  "RA from local address detected on dev: %s: default router ignored\n",
+			  skb->dev->name);
 		goto skip_defrtr;
 	}
 
@@ -1290,11 +1294,12 @@ skip_linkparms:
 	}
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
-	if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
-			  NULL, 0)) {
+	if (!(in6_dev->cnf.accept_ra_from_local ||
+	      ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
+			    NULL, 0))) {
 		ND_PRINTK(2, info,
-			  "RA: %s, chk-addr (route info) is false for dev: %s\n",
-			  __func__, skb->dev->name);
+			  "RA from local address detected on dev: %s: router info ignored.\n",
+			  skb->dev->name);
 		goto skip_routeinfo;
 	}
 
-- 
cgit v1.2.3


From 9ceb87fceacca86a37f189b84b79797c313b0c03 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 26 Jun 2014 13:16:27 +0200
Subject: pktgen: document tuning for max NIC performance

Using pktgen I'm seeing the ixgbe driver "push-back", due TX ring
running full.  Thus, the TX ring is artificially limiting pktgen.
(Diagnose via "ethtool -S", look for "tx_restart_queue" or "tx_busy"
counters.)

Using ixgbe, the real reason behind the TX ring running full, is due
to TX ring not being cleaned up fast enough. The ixgbe driver combines
TX+RX ring cleanups, and the cleanup interval is affected by the
ethtool --coalesce setting of parameter "rx-usecs".

Do not increase the default NIC TX ring buffer or default cleanup
interval.  Instead simply document that pktgen needs special NIC
tuning for maximum packet per sec performance.

Performance results with pktgen with clone_skb=100000.
TX ring size 512 (default), adjusting "rx-usecs":
 (Single CPU performance, E5-2630, ixgbe)
 - 3935002 pps - rx-usecs:  1 (irqs:  9346)
 - 5132350 pps - rx-usecs: 10 (irqs: 99157)
 - 5375111 pps - rx-usecs: 20 (irqs: 50154)
 - 5454050 pps - rx-usecs: 30 (irqs: 33872)
 - 5496320 pps - rx-usecs: 40 (irqs: 26197)
 - 5502510 pps - rx-usecs: 50 (irqs: 21527)

TX ring size adjusting (ethtool -G), "rx-usecs==1" (default):
 - 3935002 pps - tx-size:  512
 - 5354401 pps - tx-size:  768
 - 5356847 pps - tx-size: 1024
 - 5327595 pps - tx-size: 1536
 - 5356779 pps - tx-size: 2048
 - 5353438 pps - tx-size: 4096

Notice after commit 6f25cd47d (pktgen: fix xmit test for BQL enabled
devices) pktgen uses netif_xmit_frozen_or_drv_stopped() and ignores
the BQL "stack" pause (QUEUE_STATE_STACK_XOFF) flag.  This allow us to put
more pressure on the TX ring buffers.

It is the ixgbe_maybe_stop_tx() call that stops the transmits, and
pktgen respecting this in the call to netif_xmit_frozen_or_drv_stopped(txq).

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/pktgen.txt | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
index 0e30c7845b2b..0dffc6e37902 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -24,6 +24,34 @@ For monitoring and control pktgen creates:
         /proc/net/pktgen/ethX
 
 
+Tuning NIC for max performance
+==============================
+
+The default NIC setting are (likely) not tuned for pktgen's artificial
+overload type of benchmarking, as this could hurt the normal use-case.
+
+Specifically increasing the TX ring buffer in the NIC:
+ # ethtool -G ethX tx 1024
+
+A larger TX ring can improve pktgen's performance, while it can hurt
+in the general case, 1) because the TX ring buffer might get larger
+than the CPUs L1/L2 cache, 2) because it allow more queueing in the
+NIC HW layer (which is bad for bufferbloat).
+
+One should be careful to conclude, that packets/descriptors in the HW
+TX ring cause delay.  Drivers usually delay cleaning up the
+ring-buffers (for various performance reasons), thus packets stalling
+the TX ring, might just be waiting for cleanup.
+
+This cleanup issues is specifically the case, for the driver ixgbe
+(Intel 82599 chip).  This driver (ixgbe) combine TX+RX ring cleanups,
+and the cleanup interval is affected by the ethtool --coalesce setting
+of parameter "rx-usecs".
+
+For ixgbe use e.g "30" resulting in approx 33K interrupts/sec (1/30*10^6):
+ # ethtool -C ethX rx-usecs 30
+
+
 Viewing threads
 ===============
 /proc/net/pktgen/kpktgend_0 
-- 
cgit v1.2.3


From 83e82f4c706bbca3e2d9d7962e63605cc7a5fbd8 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 1 Jul 2014 21:08:40 -0700
Subject: net: systemport: add Wake-on-LAN support

Support for Wake-on-LAN using Magic Packet with or without SecureOn
password is implemented doing the following:

- setting the password to the relevant UniMAC registers
- flagging the device as a wakeup source for the system, as well as
  its Wake-on-LAN interrupt
- prepare the hardware for entering WoL mode
- enabling the MPD interrupt to wake us

The Device Tree binding documentation is also reflected to specify the
third optional Wake-on-LAN interrupt line.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/broadcom-systemport.txt           |   3 +-
 drivers/net/ethernet/broadcom/bcmsysport.c         | 155 ++++++++++++++++++++-
 drivers/net/ethernet/broadcom/bcmsysport.h         |  12 ++
 3 files changed, 166 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/broadcom-systemport.txt b/Documentation/devicetree/bindings/net/broadcom-systemport.txt
index c183ea90d9bc..aa7ad622259d 100644
--- a/Documentation/devicetree/bindings/net/broadcom-systemport.txt
+++ b/Documentation/devicetree/bindings/net/broadcom-systemport.txt
@@ -4,7 +4,8 @@ Required properties:
 - compatible: should be one of "brcm,systemport-v1.00" or "brcm,systemport"
 - reg: address and length of the register set for the device.
 - interrupts: interrupts for the device, first cell must be for the the rx
-  interrupts, and the second cell should be for the transmit queues
+  interrupts, and the second cell should be for the transmit queues. An
+  optional third interrupt cell for Wake-on-LAN can be specified
 - local-mac-address: Ethernet MAC address (48 bits) of this adapter
 - phy-mode: Should be a string describing the PHY interface to the
   Ethernet switch/PHY, see Documentation/devicetree/bindings/net/ethernet.txt
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index f00793e45330..7a1bd2b3bc26 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -384,6 +384,64 @@ static void bcm_sysport_get_stats(struct net_device *dev,
 	}
 }
 
+static void bcm_sysport_get_wol(struct net_device *dev,
+				struct ethtool_wolinfo *wol)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	u32 reg;
+
+	wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE;
+	wol->wolopts = priv->wolopts;
+
+	if (!(priv->wolopts & WAKE_MAGICSECURE))
+		return;
+
+	/* Return the programmed SecureOn password */
+	reg = umac_readl(priv, UMAC_PSW_MS);
+	put_unaligned_be16(reg, &wol->sopass[0]);
+	reg = umac_readl(priv, UMAC_PSW_LS);
+	put_unaligned_be32(reg, &wol->sopass[2]);
+}
+
+static int bcm_sysport_set_wol(struct net_device *dev,
+				struct ethtool_wolinfo *wol)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	struct device *kdev = &priv->pdev->dev;
+	u32 supported = WAKE_MAGIC | WAKE_MAGICSECURE;
+
+	if (!device_can_wakeup(kdev))
+		return -ENOTSUPP;
+
+	if (wol->wolopts & ~supported)
+		return -EINVAL;
+
+	/* Program the SecureOn password */
+	if (wol->wolopts & WAKE_MAGICSECURE) {
+		umac_writel(priv, get_unaligned_be16(&wol->sopass[0]),
+				UMAC_PSW_MS);
+		umac_writel(priv, get_unaligned_be32(&wol->sopass[2]),
+				UMAC_PSW_LS);
+	}
+
+	/* Flag the device and relevant IRQ as wakeup capable */
+	if (wol->wolopts) {
+		device_set_wakeup_enable(kdev, 1);
+		enable_irq_wake(priv->wol_irq);
+		priv->wol_irq_disabled = 0;
+	} else {
+		device_set_wakeup_enable(kdev, 0);
+		/* Avoid unbalanced disable_irq_wake calls */
+		if (!priv->wol_irq_disabled)
+			disable_irq_wake(priv->wol_irq);
+		priv->wol_irq_disabled = 1;
+	}
+
+	priv->wolopts = wol->wolopts;
+
+	return 0;
+}
+
 static void bcm_sysport_free_cb(struct bcm_sysport_cb *cb)
 {
 	dev_kfree_skb_any(cb->skb);
@@ -692,6 +750,20 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
+static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
+{
+	u32 reg;
+
+	/* Stop monitoring MPD interrupt */
+	intrl2_0_mask_set(priv, INTRL2_0_MPD);
+
+	/* Clear the MagicPacket detection logic */
+	reg = umac_readl(priv, UMAC_MPD_CTRL);
+	reg &= ~MPD_EN;
+	umac_writel(priv, reg, UMAC_MPD_CTRL);
+
+	netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n");
+}
 
 /* RX and misc interrupt routine */
 static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
@@ -722,6 +794,11 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
 	if (priv->irq0_stat & INTRL2_0_TX_RING_FULL)
 		bcm_sysport_tx_reclaim_all(priv);
 
+	if (priv->irq0_stat & INTRL2_0_MPD) {
+		netdev_info(priv->netdev, "Wake-on-LAN interrupt!\n");
+		bcm_sysport_resume_from_wol(priv);
+	}
+
 	return IRQ_HANDLED;
 }
 
@@ -757,6 +834,15 @@ static irqreturn_t bcm_sysport_tx_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t bcm_sysport_wol_isr(int irq, void *dev_id)
+{
+	struct bcm_sysport_priv *priv = dev_id;
+
+	pm_wakeup_event(&priv->pdev->dev, 0);
+
+	return IRQ_HANDLED;
+}
+
 static int bcm_sysport_insert_tsb(struct sk_buff *skb, struct net_device *dev)
 {
 	struct sk_buff *nskb;
@@ -1507,6 +1593,8 @@ static struct ethtool_ops bcm_sysport_ethtool_ops = {
 	.get_strings		= bcm_sysport_get_strings,
 	.get_ethtool_stats	= bcm_sysport_get_stats,
 	.get_sset_count		= bcm_sysport_get_sset_count,
+	.get_wol		= bcm_sysport_get_wol,
+	.set_wol		= bcm_sysport_set_wol,
 };
 
 static const struct net_device_ops bcm_sysport_netdev_ops = {
@@ -1548,6 +1636,7 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 
 	priv->irq0 = platform_get_irq(pdev, 0);
 	priv->irq1 = platform_get_irq(pdev, 1);
+	priv->wol_irq = platform_get_irq(pdev, 2);
 	if (priv->irq0 <= 0 || priv->irq1 <= 0) {
 		dev_err(&pdev->dev, "invalid interrupts\n");
 		ret = -EINVAL;
@@ -1600,6 +1689,13 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	dev->hw_features |= NETIF_F_RXCSUM | NETIF_F_HIGHDMA |
 				NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 
+	/* Request the WOL interrupt and advertise suspend if available */
+	priv->wol_irq_disabled = 1;
+	ret = devm_request_irq(&pdev->dev, priv->wol_irq,
+				bcm_sysport_wol_isr, 0, dev->name, priv);
+	if (!ret)
+		device_set_wakeup_capable(&pdev->dev, 1);
+
 	/* Set the needed headroom once and for all */
 	BUILD_BUG_ON(sizeof(struct bcm_tsb) != 8);
 	dev->needed_headroom += sizeof(struct bcm_tsb);
@@ -1647,12 +1743,55 @@ static int bcm_sysport_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM_SLEEP
+static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv)
+{
+	struct net_device *ndev = priv->netdev;
+	unsigned int timeout = 1000;
+	u32 reg;
+
+	/* Password has already been programmed */
+	reg = umac_readl(priv, UMAC_MPD_CTRL);
+	reg |= MPD_EN;
+	reg &= ~PSW_EN;
+	if (priv->wolopts & WAKE_MAGICSECURE)
+		reg |= PSW_EN;
+	umac_writel(priv, reg, UMAC_MPD_CTRL);
+
+	/* Make sure RBUF entered WoL mode as result */
+	do {
+		reg = rbuf_readl(priv, RBUF_STATUS);
+		if (reg & RBUF_WOL_MODE)
+			break;
+
+		udelay(10);
+	} while (timeout-- > 0);
+
+	/* Do not leave the UniMAC RBUF matching only MPD packets */
+	if (!timeout) {
+		reg = umac_readl(priv, UMAC_MPD_CTRL);
+		reg &= ~MPD_EN;
+		umac_writel(priv, reg, UMAC_MPD_CTRL);
+		netif_err(priv, wol, ndev, "failed to enter WOL mode\n");
+		return -ETIMEDOUT;
+	}
+
+	/* UniMAC receive needs to be turned on */
+	umac_enable_set(priv, CMD_RX_EN, 1);
+
+	/* Enable the interrupt wake-up source */
+	intrl2_0_mask_clear(priv, INTRL2_0_MPD);
+
+	netif_dbg(priv, wol, ndev, "entered WOL mode\n");
+
+	return 0;
+}
+
 static int bcm_sysport_suspend(struct device *d)
 {
 	struct net_device *dev = dev_get_drvdata(d);
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
 	unsigned int i;
-	int ret;
+	int ret = 0;
 	u32 reg;
 
 	if (!netif_running(dev))
@@ -1681,7 +1820,8 @@ static int bcm_sysport_suspend(struct device *d)
 	}
 
 	/* Flush RX pipe */
-	topctrl_writel(priv, RX_FLUSH, RX_FLUSH_CNTL);
+	if (!priv->wolopts)
+		topctrl_writel(priv, RX_FLUSH, RX_FLUSH_CNTL);
 
 	ret = tdma_enable_set(priv, 0);
 	if (ret) {
@@ -1701,7 +1841,11 @@ static int bcm_sysport_suspend(struct device *d)
 		bcm_sysport_fini_tx_ring(priv, i);
 	bcm_sysport_fini_rx_ring(priv);
 
-	return 0;
+	/* Get prepared for Wake-on-LAN */
+	if (device_may_wakeup(d) && priv->wolopts)
+		ret = bcm_sysport_suspend_to_wol(priv);
+
+	return ret;
 }
 
 static int bcm_sysport_resume(struct device *d)
@@ -1715,6 +1859,11 @@ static int bcm_sysport_resume(struct device *d)
 	if (!netif_running(dev))
 		return 0;
 
+	/* We may have been suspended and never received a WOL event that
+	 * would turn off MPD detection, take care of that now
+	 */
+	bcm_sysport_resume_from_wol(priv);
+
 	/* Initialize both hardware and software ring */
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		ret = bcm_sysport_init_tx_ring(priv, i);
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index 20ea7162478f..b08dab828101 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -246,6 +246,15 @@ struct bcm_rsb {
 #define  MIB_RX_CNT_RST			(1 << 0)
 #define  MIB_RUNT_CNT_RST		(1 << 1)
 #define  MIB_TX_CNT_RST			(1 << 2)
+
+#define UMAC_MPD_CTRL			0x620
+#define  MPD_EN				(1 << 0)
+#define  MSEQ_LEN_SHIFT			16
+#define  MSEQ_LEN_MASK			0xff
+#define  PSW_EN				(1 << 27)
+
+#define UMAC_PSW_MS			0x624
+#define UMAC_PSW_LS			0x628
 #define UMAC_MDF_CTRL			0x650
 #define UMAC_MDF_ADDR			0x654
 
@@ -642,6 +651,7 @@ struct bcm_sysport_priv {
 	struct platform_device	*pdev;
 	int			irq0;
 	int			irq1;
+	int			wol_irq;
 
 	/* Transmit rings */
 	struct bcm_sysport_tx_ring tx_rings[TDMA_NUM_RINGS];
@@ -668,6 +678,8 @@ struct bcm_sysport_priv {
 	unsigned int		tsb_en:1;
 	unsigned int		crc_fwd:1;
 	u16			rev;
+	u32			wolopts;
+	unsigned int		wol_irq_disabled:1;
 
 	/* MIB related fields */
 	struct bcm_sysport_mib	mib;
-- 
cgit v1.2.3


From cb1ce2ef387b01686469487edd45994872d52d73 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 1 Jul 2014 21:33:10 -0700
Subject: ipv6: Implement automatic flow label generation on transmit

Automatically generate flow labels for IPv6 packets on transmit.
The flow label is computed based on skb_get_hash. The flow label will
only automatically be set when it is zero otherwise (i.e. flow label
manager hasn't set one). This supports the transmit side functionality
of RFC 6438.

Added an IPv6 sysctl auto_flowlabels to enable/disable this behavior
system wide, and added IPV6_AUTOFLOWLABEL socket option to enable this
functionality per socket.

By default, auto flowlabels are disabled to avoid possible conflicts
with flow label manager, however if this feature proves useful we
may want to enable it by default.

It should also be noted that FreeBSD has already implemented automatic
flow labels (including the sysctl and socket option). In FreeBSD,
automatic flow labels default to enabled.

Performance impact:

Running super_netperf with 200 flows for TCP_RR and UDP_RR for
IPv6. Note that in UDP case, __skb_get_hash will be called for
every packet with explains slight regression. In the TCP case
the hash is saved in the socket so there is no regression.

Automatic flow labels disabled:

  TCP_RR:
    86.53% CPU utilization
    127/195/322 90/95/99% latencies
    1.40498e+06 tps

  UDP_RR:
    90.70% CPU utilization
    118/168/243 90/95/99% latencies
    1.50309e+06 tps

Automatic flow labels enabled:

  TCP_RR:
    85.90% CPU utilization
    128/199/337 90/95/99% latencies
    1.40051e+06

  UDP_RR
    92.61% CPU utilization
    115/164/236 90/95/99% latencies
    1.4687e+06

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  9 +++++++++
 include/linux/ipv6.h                   |  3 ++-
 include/net/ipv6.h                     | 20 ++++++++++++++++++++
 include/net/netns/ipv6.h               |  1 +
 include/uapi/linux/in6.h               |  1 +
 net/ipv6/af_inet6.c                    |  1 +
 net/ipv6/ip6_gre.c                     |  7 +++++--
 net/ipv6/ip6_output.c                  |  7 +++++--
 net/ipv6/ip6_tunnel.c                  |  3 ++-
 net/ipv6/ipv6_sockglue.c               |  8 ++++++++
 net/ipv6/sysctl_net_ipv6.c             |  8 ++++++++
 11 files changed, 62 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 10e216c6e05e..f35bfe43bf7a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1132,6 +1132,15 @@ flowlabel_consistency - BOOLEAN
 	FALSE: disabled
 	Default: TRUE
 
+auto_flowlabels - BOOLEAN
+	Automatically generate flow labels based based on a flow hash
+	of the packet. This allows intermediate devices, such as routers,
+	to idenfify packet flows for mechanisms like Equal Cost Multipath
+	Routing (see RFC 6438).
+	TRUE: enabled
+	FALSE: disabled
+	Default: false
+
 anycast_src_echo_reply - BOOLEAN
 	Controls the use of anycast addresses as source addresses for ICMPv6
 	echo reply
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5dc68c3ebcbd..ff560537dd61 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -199,7 +199,8 @@ struct ipv6_pinfo {
 						 * 010: prefer public address
 						 * 100: prefer care-of address
 						 */
-				dontfrag:1;
+				dontfrag:1,
+				autoflowlabel:1;
 	__u8			min_hopcount;
 	__u8			tclass;
 	__be32			rcv_flowinfo;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 2aa86e1135a1..4308f2ada8b3 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -699,6 +699,26 @@ static inline void ip6_set_txhash(struct sock *sk)
 	sk->sk_txhash = flow_hash_from_keys(&keys);
 }
 
+static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
+					__be32 flowlabel, bool autolabel)
+{
+	if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) {
+		__be32 hash;
+
+		hash = skb_get_hash(skb);
+
+		/* Since this is being sent on the wire obfuscate hash a bit
+		 * to minimize possbility that any useful information to an
+		 * attacker is leaked. Only lower 20 bits are relevant.
+		 */
+		hash ^= hash >> 12;
+
+		flowlabel = hash & IPV6_FLOWLABEL_MASK;
+	}
+
+	return flowlabel;
+}
+
 /*
  *	Header manipulation
  */
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 19d3446e59d2..eade27adecf3 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
 	int ip6_rt_mtu_expires;
 	int ip6_rt_min_advmss;
 	int flowlabel_consistency;
+	int auto_flowlabels;
 	int icmpv6_time;
 	int anycast_src_echo_reply;
 	int fwmark_reflect;
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index 0d8e0f0342dc..22b7a69619d8 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -233,6 +233,7 @@ struct in6_flowlabel_req {
 #if 0	/* not yet */
 #define IPV6_USE_MIN_MTU	63
 #endif
+#define IPV6_AUTOFLOWLABEL	64
 
 /*
  * Netfilter (1)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a426cd7099bb..2daa3a133e49 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -765,6 +765,7 @@ static int __net_init inet6_net_init(struct net *net)
 	net->ipv6.sysctl.bindv6only = 0;
 	net->ipv6.sysctl.icmpv6_time = 1*HZ;
 	net->ipv6.sysctl.flowlabel_consistency = 1;
+	net->ipv6.sysctl.auto_flowlabels = 0;
 	atomic_set(&net->ipv6.rt_genid, 0);
 
 	err = ipv6_init_mibs(net);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3873181ed856..365b2b6f3942 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -723,7 +723,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 	 *	Push down and install the IP header.
 	 */
 	ipv6h = ipv6_hdr(skb);
-	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+		     ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
 	ipv6h->hop_limit = tunnel->parms.hop_limit;
 	ipv6h->nexthdr = proto;
 	ipv6h->saddr = fl6->saddr;
@@ -1174,7 +1175,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
 	struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
 	__be16 *p = (__be16 *)(ipv6h+1);
 
-	ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel);
+	ip6_flow_hdr(ipv6h, 0,
+		     ip6_make_flowlabel(dev_net(dev), skb,
+					t->fl.u.ip6.flowlabel, false));
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = NEXTHDR_GRE;
 	ipv6h->saddr = t->parms.laddr;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index cb9df0eb4023..fa83bdd4c3dd 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	if (hlimit < 0)
 		hlimit = ip6_dst_hoplimit(dst);
 
-	ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
+	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
+						     np->autoflowlabel));
 
 	hdr->payload_len = htons(seg_len);
 	hdr->nexthdr = proto;
@@ -1569,7 +1570,9 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb_reset_network_header(skb);
 	hdr = ipv6_hdr(skb);
 
-	ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
+	ip6_flow_hdr(hdr, np->cork.tclass,
+		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
+					np->autoflowlabel));
 	hdr->hop_limit = np->cork.hop_limit;
 	hdr->nexthdr = proto;
 	hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index afa082458360..51a1eb185ea7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1046,7 +1046,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	ipv6h = ipv6_hdr(skb);
-	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+		     ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = proto;
 	ipv6h->saddr = fl6->saddr;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index cc34f65179e4..b50b9e54cf53 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -834,6 +834,10 @@ pref_skip_coa:
 		np->dontfrag = valbool;
 		retv = 0;
 		break;
+	case IPV6_AUTOFLOWLABEL:
+		np->autoflowlabel = valbool;
+		retv = 0;
+		break;
 	}
 
 	release_sock(sk);
@@ -1273,6 +1277,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->dontfrag;
 		break;
 
+	case IPV6_AUTOFLOWLABEL:
+		val = np->autoflowlabel;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 058f3eca2e53..5bf7b61f8ae8 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "auto_flowlabels",
+		.data		= &init_net.ipv6.sysctl.auto_flowlabels,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{
 		.procname	= "fwmark_reflect",
 		.data		= &init_net.ipv6.sysctl.fwmark_reflect,
@@ -74,6 +81,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[0].data = &net->ipv6.sysctl.bindv6only;
 	ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply;
 	ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;
+	ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels;
 
 	ipv6_route_table = ipv6_route_sysctl_init(net);
 	if (!ipv6_route_table)
-- 
cgit v1.2.3


From 3f518509dedc99f0b755d2ce68d24f610e3a005a Mon Sep 17 00:00:00 2001
From: Marcin Wojtas <mw@semihalf.com>
Date: Thu, 10 Jul 2014 16:52:13 -0300
Subject: ethernet: Add new driver for Marvell Armada 375 network unit

This commit adds a new network driver for the network controller in Marvell
Armada 375 SoC.

Given the controller is very different from the ones in the other Marvell
SoCs that use the mv643xx_eth (Kirkwood, Orion, Discovery) and mvneta
(Armada 370/38x/XP) drivers, a new driver is needed.

Signed-off-by: Marcin Wojtas <mw@semihalf.com>
[Ezequiel: coding style cleanup]
Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/marvell-pp2.txt        |   61 +
 drivers/net/ethernet/marvell/Kconfig               |    8 +
 drivers/net/ethernet/marvell/Makefile              |    1 +
 drivers/net/ethernet/marvell/mvpp2.c               | 6393 ++++++++++++++++++++
 4 files changed, 6463 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/marvell-pp2.txt
 create mode 100644 drivers/net/ethernet/marvell/mvpp2.c

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/marvell-pp2.txt b/Documentation/devicetree/bindings/net/marvell-pp2.txt
new file mode 100644
index 000000000000..aa4f4230bfd7
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/marvell-pp2.txt
@@ -0,0 +1,61 @@
+* Marvell Armada 375 Ethernet Controller (PPv2)
+
+Required properties:
+
+- compatible: should be "marvell,armada-375-pp2"
+- reg: addresses and length of the register sets for the device.
+  Must contain the following register sets:
+	- common controller registers
+	- LMS registers
+  In addition, at least one port register set is required.
+- clocks: a pointer to the reference clocks for this device, consequently:
+	- main controller clock
+	- GOP clock
+- clock-names: names of used clocks, must be "pp_clk" and "gop_clk".
+
+The ethernet ports are represented by subnodes. At least one port is
+required.
+
+Required properties (port):
+
+- interrupts: interrupt for the port
+- port-id: should be '0' or '1' for ethernet ports, and '2' for the
+           loopback port
+- phy-mode: See ethernet.txt file in the same directory
+
+Optional properties (port):
+
+- marvell,loopback: port is loopback mode
+- phy: a phandle to a phy node defining the PHY address (as the reg
+  property, a single integer). Note: if this property isn't present,
+  then fixed link is assumed, and the 'fixed-link' property is
+  mandatory.
+
+Example:
+
+ethernet@f0000 {
+	compatible = "marvell,armada-375-pp2";
+	reg = <0xf0000 0xa000>,
+	      <0xc0000 0x3060>,
+	      <0xc4000 0x100>,
+	      <0xc5000 0x100>;
+	clocks = <&gateclk 3>, <&gateclk 19>;
+	clock-names = "pp_clk", "gop_clk";
+	status = "okay";
+
+	eth0: eth0@c4000 {
+		interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+		port-id = <0>;
+		status = "okay";
+		phy = <&phy0>;
+		phy-mode = "gmii";
+	};
+
+	eth1: eth1@c5000 {
+		interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>;
+		port-id = <1>;
+		status = "okay";
+		phy = <&phy3>;
+		phy-mode = "gmii";
+	};
+};
diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index 68e6a6613e9a..1b4fc7c639e6 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -54,6 +54,14 @@ config MVNETA
 	  driver, which should be used for the older Marvell SoCs
 	  (Dove, Orion, Discovery, Kirkwood).
 
+config MVPP2
+	tristate "Marvell Armada 375 network interface support"
+	depends on MACH_ARMADA_375
+	select MVMDIO
+	---help---
+	  This driver supports the network interface units in the
+	  Marvell ARMADA 375 SoC.
+
 config PXA168_ETH
 	tristate "Marvell pxa168 ethernet support"
 	depends on CPU_PXA168
diff --git a/drivers/net/ethernet/marvell/Makefile b/drivers/net/ethernet/marvell/Makefile
index 5c4a7765ff0e..f6425bd2884b 100644
--- a/drivers/net/ethernet/marvell/Makefile
+++ b/drivers/net/ethernet/marvell/Makefile
@@ -5,6 +5,7 @@
 obj-$(CONFIG_MVMDIO) += mvmdio.o
 obj-$(CONFIG_MV643XX_ETH) += mv643xx_eth.o
 obj-$(CONFIG_MVNETA) += mvneta.o
+obj-$(CONFIG_MVPP2) += mvpp2.o
 obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o
 obj-$(CONFIG_SKGE) += skge.o
 obj-$(CONFIG_SKY2) += sky2.o
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
new file mode 100644
index 000000000000..9463ede32e6a
--- /dev/null
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -0,0 +1,6393 @@
+/*
+ * Driver for Marvell PPv2 network controller for Armada 375 SoC.
+ *
+ * Copyright (C) 2014 Marvell
+ *
+ * Marcin Wojtas <mw@semihalf.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <linux/inetdevice.h>
+#include <linux/mbus.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/cpumask.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/of_address.h>
+#include <linux/phy.h>
+#include <linux/clk.h>
+#include <uapi/linux/ppp_defs.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+
+/* RX Fifo Registers */
+#define MVPP2_RX_DATA_FIFO_SIZE_REG(port)	(0x00 + 4 * (port))
+#define MVPP2_RX_ATTR_FIFO_SIZE_REG(port)	(0x20 + 4 * (port))
+#define MVPP2_RX_MIN_PKT_SIZE_REG		0x60
+#define MVPP2_RX_FIFO_INIT_REG			0x64
+
+/* RX DMA Top Registers */
+#define MVPP2_RX_CTRL_REG(port)			(0x140 + 4 * (port))
+#define     MVPP2_RX_LOW_LATENCY_PKT_SIZE(s)	(((s) & 0xfff) << 16)
+#define     MVPP2_RX_USE_PSEUDO_FOR_CSUM_MASK	BIT(31)
+#define MVPP2_POOL_BUF_SIZE_REG(pool)		(0x180 + 4 * (pool))
+#define     MVPP2_POOL_BUF_SIZE_OFFSET		5
+#define MVPP2_RXQ_CONFIG_REG(rxq)		(0x800 + 4 * (rxq))
+#define     MVPP2_SNOOP_PKT_SIZE_MASK		0x1ff
+#define     MVPP2_SNOOP_BUF_HDR_MASK		BIT(9)
+#define     MVPP2_RXQ_POOL_SHORT_OFFS		20
+#define     MVPP2_RXQ_POOL_SHORT_MASK		0x700000
+#define     MVPP2_RXQ_POOL_LONG_OFFS		24
+#define     MVPP2_RXQ_POOL_LONG_MASK		0x7000000
+#define     MVPP2_RXQ_PACKET_OFFSET_OFFS	28
+#define     MVPP2_RXQ_PACKET_OFFSET_MASK	0x70000000
+#define     MVPP2_RXQ_DISABLE_MASK		BIT(31)
+
+/* Parser Registers */
+#define MVPP2_PRS_INIT_LOOKUP_REG		0x1000
+#define     MVPP2_PRS_PORT_LU_MAX		0xf
+#define     MVPP2_PRS_PORT_LU_MASK(port)	(0xff << ((port) * 4))
+#define     MVPP2_PRS_PORT_LU_VAL(port, val)	((val) << ((port) * 4))
+#define MVPP2_PRS_INIT_OFFS_REG(port)		(0x1004 + ((port) & 4))
+#define     MVPP2_PRS_INIT_OFF_MASK(port)	(0x3f << (((port) % 4) * 8))
+#define     MVPP2_PRS_INIT_OFF_VAL(port, val)	((val) << (((port) % 4) * 8))
+#define MVPP2_PRS_MAX_LOOP_REG(port)		(0x100c + ((port) & 4))
+#define     MVPP2_PRS_MAX_LOOP_MASK(port)	(0xff << (((port) % 4) * 8))
+#define     MVPP2_PRS_MAX_LOOP_VAL(port, val)	((val) << (((port) % 4) * 8))
+#define MVPP2_PRS_TCAM_IDX_REG			0x1100
+#define MVPP2_PRS_TCAM_DATA_REG(idx)		(0x1104 + (idx) * 4)
+#define     MVPP2_PRS_TCAM_INV_MASK		BIT(31)
+#define MVPP2_PRS_SRAM_IDX_REG			0x1200
+#define MVPP2_PRS_SRAM_DATA_REG(idx)		(0x1204 + (idx) * 4)
+#define MVPP2_PRS_TCAM_CTRL_REG			0x1230
+#define     MVPP2_PRS_TCAM_EN_MASK		BIT(0)
+
+/* Classifier Registers */
+#define MVPP2_CLS_MODE_REG			0x1800
+#define     MVPP2_CLS_MODE_ACTIVE_MASK		BIT(0)
+#define MVPP2_CLS_PORT_WAY_REG			0x1810
+#define     MVPP2_CLS_PORT_WAY_MASK(port)	(1 << (port))
+#define MVPP2_CLS_LKP_INDEX_REG			0x1814
+#define     MVPP2_CLS_LKP_INDEX_WAY_OFFS	6
+#define MVPP2_CLS_LKP_TBL_REG			0x1818
+#define     MVPP2_CLS_LKP_TBL_RXQ_MASK		0xff
+#define     MVPP2_CLS_LKP_TBL_LOOKUP_EN_MASK	BIT(25)
+#define MVPP2_CLS_FLOW_INDEX_REG		0x1820
+#define MVPP2_CLS_FLOW_TBL0_REG			0x1824
+#define MVPP2_CLS_FLOW_TBL1_REG			0x1828
+#define MVPP2_CLS_FLOW_TBL2_REG			0x182c
+#define MVPP2_CLS_OVERSIZE_RXQ_LOW_REG(port)	(0x1980 + ((port) * 4))
+#define     MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS	3
+#define     MVPP2_CLS_OVERSIZE_RXQ_LOW_MASK	0x7
+#define MVPP2_CLS_SWFWD_P2HQ_REG(port)		(0x19b0 + ((port) * 4))
+#define MVPP2_CLS_SWFWD_PCTRL_REG		0x19d0
+#define     MVPP2_CLS_SWFWD_PCTRL_MASK(port)	(1 << (port))
+
+/* Descriptor Manager Top Registers */
+#define MVPP2_RXQ_NUM_REG			0x2040
+#define MVPP2_RXQ_DESC_ADDR_REG			0x2044
+#define MVPP2_RXQ_DESC_SIZE_REG			0x2048
+#define     MVPP2_RXQ_DESC_SIZE_MASK		0x3ff0
+#define MVPP2_RXQ_STATUS_UPDATE_REG(rxq)	(0x3000 + 4 * (rxq))
+#define     MVPP2_RXQ_NUM_PROCESSED_OFFSET	0
+#define     MVPP2_RXQ_NUM_NEW_OFFSET		16
+#define MVPP2_RXQ_STATUS_REG(rxq)		(0x3400 + 4 * (rxq))
+#define     MVPP2_RXQ_OCCUPIED_MASK		0x3fff
+#define     MVPP2_RXQ_NON_OCCUPIED_OFFSET	16
+#define     MVPP2_RXQ_NON_OCCUPIED_MASK		0x3fff0000
+#define MVPP2_RXQ_THRESH_REG			0x204c
+#define     MVPP2_OCCUPIED_THRESH_OFFSET	0
+#define     MVPP2_OCCUPIED_THRESH_MASK		0x3fff
+#define MVPP2_RXQ_INDEX_REG			0x2050
+#define MVPP2_TXQ_NUM_REG			0x2080
+#define MVPP2_TXQ_DESC_ADDR_REG			0x2084
+#define MVPP2_TXQ_DESC_SIZE_REG			0x2088
+#define     MVPP2_TXQ_DESC_SIZE_MASK		0x3ff0
+#define MVPP2_AGGR_TXQ_UPDATE_REG		0x2090
+#define MVPP2_TXQ_THRESH_REG			0x2094
+#define     MVPP2_TRANSMITTED_THRESH_OFFSET	16
+#define     MVPP2_TRANSMITTED_THRESH_MASK	0x3fff0000
+#define MVPP2_TXQ_INDEX_REG			0x2098
+#define MVPP2_TXQ_PREF_BUF_REG			0x209c
+#define     MVPP2_PREF_BUF_PTR(desc)		((desc) & 0xfff)
+#define     MVPP2_PREF_BUF_SIZE_4		(BIT(12) | BIT(13))
+#define     MVPP2_PREF_BUF_SIZE_16		(BIT(12) | BIT(14))
+#define     MVPP2_PREF_BUF_THRESH(val)		((val) << 17)
+#define     MVPP2_TXQ_DRAIN_EN_MASK		BIT(31)
+#define MVPP2_TXQ_PENDING_REG			0x20a0
+#define     MVPP2_TXQ_PENDING_MASK		0x3fff
+#define MVPP2_TXQ_INT_STATUS_REG		0x20a4
+#define MVPP2_TXQ_SENT_REG(txq)			(0x3c00 + 4 * (txq))
+#define     MVPP2_TRANSMITTED_COUNT_OFFSET	16
+#define     MVPP2_TRANSMITTED_COUNT_MASK	0x3fff0000
+#define MVPP2_TXQ_RSVD_REQ_REG			0x20b0
+#define     MVPP2_TXQ_RSVD_REQ_Q_OFFSET		16
+#define MVPP2_TXQ_RSVD_RSLT_REG			0x20b4
+#define     MVPP2_TXQ_RSVD_RSLT_MASK		0x3fff
+#define MVPP2_TXQ_RSVD_CLR_REG			0x20b8
+#define     MVPP2_TXQ_RSVD_CLR_OFFSET		16
+#define MVPP2_AGGR_TXQ_DESC_ADDR_REG(cpu)	(0x2100 + 4 * (cpu))
+#define MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu)	(0x2140 + 4 * (cpu))
+#define     MVPP2_AGGR_TXQ_DESC_SIZE_MASK	0x3ff0
+#define MVPP2_AGGR_TXQ_STATUS_REG(cpu)		(0x2180 + 4 * (cpu))
+#define     MVPP2_AGGR_TXQ_PENDING_MASK		0x3fff
+#define MVPP2_AGGR_TXQ_INDEX_REG(cpu)		(0x21c0 + 4 * (cpu))
+
+/* MBUS bridge registers */
+#define MVPP2_WIN_BASE(w)			(0x4000 + ((w) << 2))
+#define MVPP2_WIN_SIZE(w)			(0x4020 + ((w) << 2))
+#define MVPP2_WIN_REMAP(w)			(0x4040 + ((w) << 2))
+#define MVPP2_BASE_ADDR_ENABLE			0x4060
+
+/* Interrupt Cause and Mask registers */
+#define MVPP2_ISR_RX_THRESHOLD_REG(rxq)		(0x5200 + 4 * (rxq))
+#define MVPP2_ISR_RXQ_GROUP_REG(rxq)		(0x5400 + 4 * (rxq))
+#define MVPP2_ISR_ENABLE_REG(port)		(0x5420 + 4 * (port))
+#define     MVPP2_ISR_ENABLE_INTERRUPT(mask)	((mask) & 0xffff)
+#define     MVPP2_ISR_DISABLE_INTERRUPT(mask)	(((mask) << 16) & 0xffff0000)
+#define MVPP2_ISR_RX_TX_CAUSE_REG(port)		(0x5480 + 4 * (port))
+#define     MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK	0xffff
+#define     MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK	0xff0000
+#define     MVPP2_CAUSE_RX_FIFO_OVERRUN_MASK	BIT(24)
+#define     MVPP2_CAUSE_FCS_ERR_MASK		BIT(25)
+#define     MVPP2_CAUSE_TX_FIFO_UNDERRUN_MASK	BIT(26)
+#define     MVPP2_CAUSE_TX_EXCEPTION_SUM_MASK	BIT(29)
+#define     MVPP2_CAUSE_RX_EXCEPTION_SUM_MASK	BIT(30)
+#define     MVPP2_CAUSE_MISC_SUM_MASK		BIT(31)
+#define MVPP2_ISR_RX_TX_MASK_REG(port)		(0x54a0 + 4 * (port))
+#define MVPP2_ISR_PON_RX_TX_MASK_REG		0x54bc
+#define     MVPP2_PON_CAUSE_RXQ_OCCUP_DESC_ALL_MASK	0xffff
+#define     MVPP2_PON_CAUSE_TXP_OCCUP_DESC_ALL_MASK	0x3fc00000
+#define     MVPP2_PON_CAUSE_MISC_SUM_MASK		BIT(31)
+#define MVPP2_ISR_MISC_CAUSE_REG		0x55b0
+
+/* Buffer Manager registers */
+#define MVPP2_BM_POOL_BASE_REG(pool)		(0x6000 + ((pool) * 4))
+#define     MVPP2_BM_POOL_BASE_ADDR_MASK	0xfffff80
+#define MVPP2_BM_POOL_SIZE_REG(pool)		(0x6040 + ((pool) * 4))
+#define     MVPP2_BM_POOL_SIZE_MASK		0xfff0
+#define MVPP2_BM_POOL_READ_PTR_REG(pool)	(0x6080 + ((pool) * 4))
+#define     MVPP2_BM_POOL_GET_READ_PTR_MASK	0xfff0
+#define MVPP2_BM_POOL_PTRS_NUM_REG(pool)	(0x60c0 + ((pool) * 4))
+#define     MVPP2_BM_POOL_PTRS_NUM_MASK		0xfff0
+#define MVPP2_BM_BPPI_READ_PTR_REG(pool)	(0x6100 + ((pool) * 4))
+#define MVPP2_BM_BPPI_PTRS_NUM_REG(pool)	(0x6140 + ((pool) * 4))
+#define     MVPP2_BM_BPPI_PTR_NUM_MASK		0x7ff
+#define     MVPP2_BM_BPPI_PREFETCH_FULL_MASK	BIT(16)
+#define MVPP2_BM_POOL_CTRL_REG(pool)		(0x6200 + ((pool) * 4))
+#define     MVPP2_BM_START_MASK			BIT(0)
+#define     MVPP2_BM_STOP_MASK			BIT(1)
+#define     MVPP2_BM_STATE_MASK			BIT(4)
+#define     MVPP2_BM_LOW_THRESH_OFFS		8
+#define     MVPP2_BM_LOW_THRESH_MASK		0x7f00
+#define     MVPP2_BM_LOW_THRESH_VALUE(val)	((val) << \
+						MVPP2_BM_LOW_THRESH_OFFS)
+#define     MVPP2_BM_HIGH_THRESH_OFFS		16
+#define     MVPP2_BM_HIGH_THRESH_MASK		0x7f0000
+#define     MVPP2_BM_HIGH_THRESH_VALUE(val)	((val) << \
+						MVPP2_BM_HIGH_THRESH_OFFS)
+#define MVPP2_BM_INTR_CAUSE_REG(pool)		(0x6240 + ((pool) * 4))
+#define     MVPP2_BM_RELEASED_DELAY_MASK	BIT(0)
+#define     MVPP2_BM_ALLOC_FAILED_MASK		BIT(1)
+#define     MVPP2_BM_BPPE_EMPTY_MASK		BIT(2)
+#define     MVPP2_BM_BPPE_FULL_MASK		BIT(3)
+#define     MVPP2_BM_AVAILABLE_BP_LOW_MASK	BIT(4)
+#define MVPP2_BM_INTR_MASK_REG(pool)		(0x6280 + ((pool) * 4))
+#define MVPP2_BM_PHY_ALLOC_REG(pool)		(0x6400 + ((pool) * 4))
+#define     MVPP2_BM_PHY_ALLOC_GRNTD_MASK	BIT(0)
+#define MVPP2_BM_VIRT_ALLOC_REG			0x6440
+#define MVPP2_BM_PHY_RLS_REG(pool)		(0x6480 + ((pool) * 4))
+#define     MVPP2_BM_PHY_RLS_MC_BUFF_MASK	BIT(0)
+#define     MVPP2_BM_PHY_RLS_PRIO_EN_MASK	BIT(1)
+#define     MVPP2_BM_PHY_RLS_GRNTD_MASK		BIT(2)
+#define MVPP2_BM_VIRT_RLS_REG			0x64c0
+#define MVPP2_BM_MC_RLS_REG			0x64c4
+#define     MVPP2_BM_MC_ID_MASK			0xfff
+#define     MVPP2_BM_FORCE_RELEASE_MASK		BIT(12)
+
+/* TX Scheduler registers */
+#define MVPP2_TXP_SCHED_PORT_INDEX_REG		0x8000
+#define MVPP2_TXP_SCHED_Q_CMD_REG		0x8004
+#define     MVPP2_TXP_SCHED_ENQ_MASK		0xff
+#define     MVPP2_TXP_SCHED_DISQ_OFFSET		8
+#define MVPP2_TXP_SCHED_CMD_1_REG		0x8010
+#define MVPP2_TXP_SCHED_PERIOD_REG		0x8018
+#define MVPP2_TXP_SCHED_MTU_REG			0x801c
+#define     MVPP2_TXP_MTU_MAX			0x7FFFF
+#define MVPP2_TXP_SCHED_REFILL_REG		0x8020
+#define     MVPP2_TXP_REFILL_TOKENS_ALL_MASK	0x7ffff
+#define     MVPP2_TXP_REFILL_PERIOD_ALL_MASK	0x3ff00000
+#define     MVPP2_TXP_REFILL_PERIOD_MASK(v)	((v) << 20)
+#define MVPP2_TXP_SCHED_TOKEN_SIZE_REG		0x8024
+#define     MVPP2_TXP_TOKEN_SIZE_MAX		0xffffffff
+#define MVPP2_TXQ_SCHED_REFILL_REG(q)		(0x8040 + ((q) << 2))
+#define     MVPP2_TXQ_REFILL_TOKENS_ALL_MASK	0x7ffff
+#define     MVPP2_TXQ_REFILL_PERIOD_ALL_MASK	0x3ff00000
+#define     MVPP2_TXQ_REFILL_PERIOD_MASK(v)	((v) << 20)
+#define MVPP2_TXQ_SCHED_TOKEN_SIZE_REG(q)	(0x8060 + ((q) << 2))
+#define     MVPP2_TXQ_TOKEN_SIZE_MAX		0x7fffffff
+#define MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(q)	(0x8080 + ((q) << 2))
+#define     MVPP2_TXQ_TOKEN_CNTR_MAX		0xffffffff
+
+/* TX general registers */
+#define MVPP2_TX_SNOOP_REG			0x8800
+#define MVPP2_TX_PORT_FLUSH_REG			0x8810
+#define     MVPP2_TX_PORT_FLUSH_MASK(port)	(1 << (port))
+
+/* LMS registers */
+#define MVPP2_SRC_ADDR_MIDDLE			0x24
+#define MVPP2_SRC_ADDR_HIGH			0x28
+#define MVPP2_MIB_COUNTERS_BASE(port)		(0x1000 + ((port) >> 1) * \
+						0x400 + (port) * 0x400)
+#define     MVPP2_MIB_LATE_COLLISION		0x7c
+#define MVPP2_ISR_SUM_MASK_REG			0x220c
+#define MVPP2_MNG_EXTENDED_GLOBAL_CTRL_REG	0x305c
+#define MVPP2_EXT_GLOBAL_CTRL_DEFAULT		0x27
+
+/* Per-port registers */
+#define MVPP2_GMAC_CTRL_0_REG			0x0
+#define      MVPP2_GMAC_PORT_EN_MASK		BIT(0)
+#define      MVPP2_GMAC_MAX_RX_SIZE_OFFS	2
+#define      MVPP2_GMAC_MAX_RX_SIZE_MASK	0x7ffc
+#define      MVPP2_GMAC_MIB_CNTR_EN_MASK	BIT(15)
+#define MVPP2_GMAC_CTRL_1_REG			0x4
+#define      MVPP2_GMAC_PERIODIC_XON_EN_MASK	BIT(0)
+#define      MVPP2_GMAC_GMII_LB_EN_MASK		BIT(5)
+#define      MVPP2_GMAC_PCS_LB_EN_BIT		6
+#define      MVPP2_GMAC_PCS_LB_EN_MASK		BIT(6)
+#define      MVPP2_GMAC_SA_LOW_OFFS		7
+#define MVPP2_GMAC_CTRL_2_REG			0x8
+#define      MVPP2_GMAC_INBAND_AN_MASK		BIT(0)
+#define      MVPP2_GMAC_PCS_ENABLE_MASK		BIT(3)
+#define      MVPP2_GMAC_PORT_RGMII_MASK		BIT(4)
+#define      MVPP2_GMAC_PORT_RESET_MASK		BIT(6)
+#define MVPP2_GMAC_AUTONEG_CONFIG		0xc
+#define      MVPP2_GMAC_FORCE_LINK_DOWN		BIT(0)
+#define      MVPP2_GMAC_FORCE_LINK_PASS		BIT(1)
+#define      MVPP2_GMAC_CONFIG_MII_SPEED	BIT(5)
+#define      MVPP2_GMAC_CONFIG_GMII_SPEED	BIT(6)
+#define      MVPP2_GMAC_AN_SPEED_EN		BIT(7)
+#define      MVPP2_GMAC_CONFIG_FULL_DUPLEX	BIT(12)
+#define      MVPP2_GMAC_AN_DUPLEX_EN		BIT(13)
+#define MVPP2_GMAC_PORT_FIFO_CFG_1_REG		0x1c
+#define      MVPP2_GMAC_TX_FIFO_MIN_TH_OFFS	6
+#define      MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK	0x1fc0
+#define      MVPP2_GMAC_TX_FIFO_MIN_TH_MASK(v)	(((v) << 6) & \
+					MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK)
+
+#define MVPP2_CAUSE_TXQ_SENT_DESC_ALL_MASK	0xff
+
+/* Descriptor ring Macros */
+#define MVPP2_QUEUE_NEXT_DESC(q, index) \
+	(((index) < (q)->last_desc) ? ((index) + 1) : 0)
+
+/* Various constants */
+
+/* Coalescing */
+#define MVPP2_TXDONE_COAL_PKTS_THRESH	15
+#define MVPP2_RX_COAL_PKTS		32
+#define MVPP2_RX_COAL_USEC		100
+
+/* The two bytes Marvell header. Either contains a special value used
+ * by Marvell switches when a specific hardware mode is enabled (not
+ * supported by this driver) or is filled automatically by zeroes on
+ * the RX side. Those two bytes being at the front of the Ethernet
+ * header, they allow to have the IP header aligned on a 4 bytes
+ * boundary automatically: the hardware skips those two bytes on its
+ * own.
+ */
+#define MVPP2_MH_SIZE			2
+#define MVPP2_ETH_TYPE_LEN		2
+#define MVPP2_PPPOE_HDR_SIZE		8
+#define MVPP2_VLAN_TAG_LEN		4
+
+/* Lbtd 802.3 type */
+#define MVPP2_IP_LBDT_TYPE		0xfffa
+
+#define MVPP2_CPU_D_CACHE_LINE_SIZE	32
+#define MVPP2_TX_CSUM_MAX_SIZE		9800
+
+/* Timeout constants */
+#define MVPP2_TX_DISABLE_TIMEOUT_MSEC	1000
+#define MVPP2_TX_PENDING_TIMEOUT_MSEC	1000
+
+#define MVPP2_TX_MTU_MAX		0x7ffff
+
+/* Maximum number of T-CONTs of PON port */
+#define MVPP2_MAX_TCONT			16
+
+/* Maximum number of supported ports */
+#define MVPP2_MAX_PORTS			4
+
+/* Maximum number of TXQs used by single port */
+#define MVPP2_MAX_TXQ			8
+
+/* Maximum number of RXQs used by single port */
+#define MVPP2_MAX_RXQ			8
+
+/* Dfault number of RXQs in use */
+#define MVPP2_DEFAULT_RXQ		4
+
+/* Total number of RXQs available to all ports */
+#define MVPP2_RXQ_TOTAL_NUM		(MVPP2_MAX_PORTS * MVPP2_MAX_RXQ)
+
+/* Max number of Rx descriptors */
+#define MVPP2_MAX_RXD			128
+
+/* Max number of Tx descriptors */
+#define MVPP2_MAX_TXD			1024
+
+/* Amount of Tx descriptors that can be reserved at once by CPU */
+#define MVPP2_CPU_DESC_CHUNK		64
+
+/* Max number of Tx descriptors in each aggregated queue */
+#define MVPP2_AGGR_TXQ_SIZE		256
+
+/* Descriptor aligned size */
+#define MVPP2_DESC_ALIGNED_SIZE		32
+
+/* Descriptor alignment mask */
+#define MVPP2_TX_DESC_ALIGN		(MVPP2_DESC_ALIGNED_SIZE - 1)
+
+/* RX FIFO constants */
+#define MVPP2_RX_FIFO_PORT_DATA_SIZE	0x2000
+#define MVPP2_RX_FIFO_PORT_ATTR_SIZE	0x80
+#define MVPP2_RX_FIFO_PORT_MIN_PKT	0x80
+
+/* RX buffer constants */
+#define MVPP2_SKB_SHINFO_SIZE \
+	SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
+
+#define MVPP2_RX_PKT_SIZE(mtu) \
+	ALIGN((mtu) + MVPP2_MH_SIZE + MVPP2_VLAN_TAG_LEN + \
+	      ETH_HLEN + ETH_FCS_LEN, MVPP2_CPU_D_CACHE_LINE_SIZE)
+
+#define MVPP2_RX_BUF_SIZE(pkt_size)	((pkt_size) + NET_SKB_PAD)
+#define MVPP2_RX_TOTAL_SIZE(buf_size)	((buf_size) + MVPP2_SKB_SHINFO_SIZE)
+#define MVPP2_RX_MAX_PKT_SIZE(total_size) \
+	((total_size) - NET_SKB_PAD - MVPP2_SKB_SHINFO_SIZE)
+
+#define MVPP2_BIT_TO_BYTE(bit)		((bit) / 8)
+
+/* IPv6 max L3 address size */
+#define MVPP2_MAX_L3_ADDR_SIZE		16
+
+/* Port flags */
+#define MVPP2_F_LOOPBACK		BIT(0)
+
+/* Marvell tag types */
+enum mvpp2_tag_type {
+	MVPP2_TAG_TYPE_NONE = 0,
+	MVPP2_TAG_TYPE_MH   = 1,
+	MVPP2_TAG_TYPE_DSA  = 2,
+	MVPP2_TAG_TYPE_EDSA = 3,
+	MVPP2_TAG_TYPE_VLAN = 4,
+	MVPP2_TAG_TYPE_LAST = 5
+};
+
+/* Parser constants */
+#define MVPP2_PRS_TCAM_SRAM_SIZE	256
+#define MVPP2_PRS_TCAM_WORDS		6
+#define MVPP2_PRS_SRAM_WORDS		4
+#define MVPP2_PRS_FLOW_ID_SIZE		64
+#define MVPP2_PRS_FLOW_ID_MASK		0x3f
+#define MVPP2_PRS_TCAM_ENTRY_INVALID	1
+#define MVPP2_PRS_TCAM_DSA_TAGGED_BIT	BIT(5)
+#define MVPP2_PRS_IPV4_HEAD		0x40
+#define MVPP2_PRS_IPV4_HEAD_MASK	0xf0
+#define MVPP2_PRS_IPV4_MC		0xe0
+#define MVPP2_PRS_IPV4_MC_MASK		0xf0
+#define MVPP2_PRS_IPV4_BC_MASK		0xff
+#define MVPP2_PRS_IPV4_IHL		0x5
+#define MVPP2_PRS_IPV4_IHL_MASK		0xf
+#define MVPP2_PRS_IPV6_MC		0xff
+#define MVPP2_PRS_IPV6_MC_MASK		0xff
+#define MVPP2_PRS_IPV6_HOP_MASK		0xff
+#define MVPP2_PRS_TCAM_PROTO_MASK	0xff
+#define MVPP2_PRS_TCAM_PROTO_MASK_L	0x3f
+#define MVPP2_PRS_DBL_VLANS_MAX		100
+
+/* Tcam structure:
+ * - lookup ID - 4 bits
+ * - port ID - 1 byte
+ * - additional information - 1 byte
+ * - header data - 8 bytes
+ * The fields are represented by MVPP2_PRS_TCAM_DATA_REG(5)->(0).
+ */
+#define MVPP2_PRS_AI_BITS			8
+#define MVPP2_PRS_PORT_MASK			0xff
+#define MVPP2_PRS_LU_MASK			0xf
+#define MVPP2_PRS_TCAM_DATA_BYTE(offs)		\
+				    (((offs) - ((offs) % 2)) * 2 + ((offs) % 2))
+#define MVPP2_PRS_TCAM_DATA_BYTE_EN(offs)	\
+					      (((offs) * 2) - ((offs) % 2)  + 2)
+#define MVPP2_PRS_TCAM_AI_BYTE			16
+#define MVPP2_PRS_TCAM_PORT_BYTE		17
+#define MVPP2_PRS_TCAM_LU_BYTE			20
+#define MVPP2_PRS_TCAM_EN_OFFS(offs)		((offs) + 2)
+#define MVPP2_PRS_TCAM_INV_WORD			5
+/* Tcam entries ID */
+#define MVPP2_PE_DROP_ALL		0
+#define MVPP2_PE_FIRST_FREE_TID		1
+#define MVPP2_PE_LAST_FREE_TID		(MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+#define MVPP2_PE_IP6_EXT_PROTO_UN	(MVPP2_PRS_TCAM_SRAM_SIZE - 30)
+#define MVPP2_PE_MAC_MC_IP6		(MVPP2_PRS_TCAM_SRAM_SIZE - 29)
+#define MVPP2_PE_IP6_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 28)
+#define MVPP2_PE_IP4_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 27)
+#define MVPP2_PE_LAST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 26)
+#define MVPP2_PE_FIRST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 19)
+#define MVPP2_PE_EDSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 18)
+#define MVPP2_PE_EDSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 17)
+#define MVPP2_PE_DSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 16)
+#define MVPP2_PE_DSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 15)
+#define MVPP2_PE_ETYPE_EDSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 14)
+#define MVPP2_PE_ETYPE_EDSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 13)
+#define MVPP2_PE_ETYPE_DSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 12)
+#define MVPP2_PE_ETYPE_DSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 11)
+#define MVPP2_PE_MH_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 10)
+#define MVPP2_PE_DSA_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 9)
+#define MVPP2_PE_IP6_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 8)
+#define MVPP2_PE_IP4_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 7)
+#define MVPP2_PE_ETH_TYPE_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 6)
+#define MVPP2_PE_VLAN_DBL		(MVPP2_PRS_TCAM_SRAM_SIZE - 5)
+#define MVPP2_PE_VLAN_NONE		(MVPP2_PRS_TCAM_SRAM_SIZE - 4)
+#define MVPP2_PE_MAC_MC_ALL		(MVPP2_PRS_TCAM_SRAM_SIZE - 3)
+#define MVPP2_PE_MAC_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 2)
+#define MVPP2_PE_MAC_NON_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 1)
+
+/* Sram structure
+ * The fields are represented by MVPP2_PRS_TCAM_DATA_REG(3)->(0).
+ */
+#define MVPP2_PRS_SRAM_RI_OFFS			0
+#define MVPP2_PRS_SRAM_RI_WORD			0
+#define MVPP2_PRS_SRAM_RI_CTRL_OFFS		32
+#define MVPP2_PRS_SRAM_RI_CTRL_WORD		1
+#define MVPP2_PRS_SRAM_RI_CTRL_BITS		32
+#define MVPP2_PRS_SRAM_SHIFT_OFFS		64
+#define MVPP2_PRS_SRAM_SHIFT_SIGN_BIT		72
+#define MVPP2_PRS_SRAM_UDF_OFFS			73
+#define MVPP2_PRS_SRAM_UDF_BITS			8
+#define MVPP2_PRS_SRAM_UDF_MASK			0xff
+#define MVPP2_PRS_SRAM_UDF_SIGN_BIT		81
+#define MVPP2_PRS_SRAM_UDF_TYPE_OFFS		82
+#define MVPP2_PRS_SRAM_UDF_TYPE_MASK		0x7
+#define MVPP2_PRS_SRAM_UDF_TYPE_L3		1
+#define MVPP2_PRS_SRAM_UDF_TYPE_L4		4
+#define MVPP2_PRS_SRAM_OP_SEL_SHIFT_OFFS	85
+#define MVPP2_PRS_SRAM_OP_SEL_SHIFT_MASK	0x3
+#define MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD		1
+#define MVPP2_PRS_SRAM_OP_SEL_SHIFT_IP4_ADD	2
+#define MVPP2_PRS_SRAM_OP_SEL_SHIFT_IP6_ADD	3
+#define MVPP2_PRS_SRAM_OP_SEL_UDF_OFFS		87
+#define MVPP2_PRS_SRAM_OP_SEL_UDF_BITS		2
+#define MVPP2_PRS_SRAM_OP_SEL_UDF_MASK		0x3
+#define MVPP2_PRS_SRAM_OP_SEL_UDF_ADD		0
+#define MVPP2_PRS_SRAM_OP_SEL_UDF_IP4_ADD	2
+#define MVPP2_PRS_SRAM_OP_SEL_UDF_IP6_ADD	3
+#define MVPP2_PRS_SRAM_OP_SEL_BASE_OFFS		89
+#define MVPP2_PRS_SRAM_AI_OFFS			90
+#define MVPP2_PRS_SRAM_AI_CTRL_OFFS		98
+#define MVPP2_PRS_SRAM_AI_CTRL_BITS		8
+#define MVPP2_PRS_SRAM_AI_MASK			0xff
+#define MVPP2_PRS_SRAM_NEXT_LU_OFFS		106
+#define MVPP2_PRS_SRAM_NEXT_LU_MASK		0xf
+#define MVPP2_PRS_SRAM_LU_DONE_BIT		110
+#define MVPP2_PRS_SRAM_LU_GEN_BIT		111
+
+/* Sram result info bits assignment */
+#define MVPP2_PRS_RI_MAC_ME_MASK		0x1
+#define MVPP2_PRS_RI_DSA_MASK			0x2
+#define MVPP2_PRS_RI_VLAN_MASK			0xc
+#define MVPP2_PRS_RI_VLAN_NONE			~(BIT(2) | BIT(3))
+#define MVPP2_PRS_RI_VLAN_SINGLE		BIT(2)
+#define MVPP2_PRS_RI_VLAN_DOUBLE		BIT(3)
+#define MVPP2_PRS_RI_VLAN_TRIPLE		(BIT(2) | BIT(3))
+#define MVPP2_PRS_RI_CPU_CODE_MASK		0x70
+#define MVPP2_PRS_RI_CPU_CODE_RX_SPEC		BIT(4)
+#define MVPP2_PRS_RI_L2_CAST_MASK		0x600
+#define MVPP2_PRS_RI_L2_UCAST			~(BIT(9) | BIT(10))
+#define MVPP2_PRS_RI_L2_MCAST			BIT(9)
+#define MVPP2_PRS_RI_L2_BCAST			BIT(10)
+#define MVPP2_PRS_RI_PPPOE_MASK			0x800
+#define MVPP2_PRS_RI_L3_PROTO_MASK		0x7000
+#define MVPP2_PRS_RI_L3_UN			~(BIT(12) | BIT(13) | BIT(14))
+#define MVPP2_PRS_RI_L3_IP4			BIT(12)
+#define MVPP2_PRS_RI_L3_IP4_OPT			BIT(13)
+#define MVPP2_PRS_RI_L3_IP4_OTHER		(BIT(12) | BIT(13))
+#define MVPP2_PRS_RI_L3_IP6			BIT(14)
+#define MVPP2_PRS_RI_L3_IP6_EXT			(BIT(12) | BIT(14))
+#define MVPP2_PRS_RI_L3_ARP			(BIT(13) | BIT(14))
+#define MVPP2_PRS_RI_L3_ADDR_MASK		0x18000
+#define MVPP2_PRS_RI_L3_UCAST			~(BIT(15) | BIT(16))
+#define MVPP2_PRS_RI_L3_MCAST			BIT(15)
+#define MVPP2_PRS_RI_L3_BCAST			(BIT(15) | BIT(16))
+#define MVPP2_PRS_RI_IP_FRAG_MASK		0x20000
+#define MVPP2_PRS_RI_UDF3_MASK			0x300000
+#define MVPP2_PRS_RI_UDF3_RX_SPECIAL		BIT(21)
+#define MVPP2_PRS_RI_L4_PROTO_MASK		0x1c00000
+#define MVPP2_PRS_RI_L4_TCP			BIT(22)
+#define MVPP2_PRS_RI_L4_UDP			BIT(23)
+#define MVPP2_PRS_RI_L4_OTHER			(BIT(22) | BIT(23))
+#define MVPP2_PRS_RI_UDF7_MASK			0x60000000
+#define MVPP2_PRS_RI_UDF7_IP6_LITE		BIT(29)
+#define MVPP2_PRS_RI_DROP_MASK			0x80000000
+
+/* Sram additional info bits assignment */
+#define MVPP2_PRS_IPV4_DIP_AI_BIT		BIT(0)
+#define MVPP2_PRS_IPV6_NO_EXT_AI_BIT		BIT(0)
+#define MVPP2_PRS_IPV6_EXT_AI_BIT		BIT(1)
+#define MVPP2_PRS_IPV6_EXT_AH_AI_BIT		BIT(2)
+#define MVPP2_PRS_IPV6_EXT_AH_LEN_AI_BIT	BIT(3)
+#define MVPP2_PRS_IPV6_EXT_AH_L4_AI_BIT		BIT(4)
+#define MVPP2_PRS_SINGLE_VLAN_AI		0
+#define MVPP2_PRS_DBL_VLAN_AI_BIT		BIT(7)
+
+/* DSA/EDSA type */
+#define MVPP2_PRS_TAGGED		true
+#define MVPP2_PRS_UNTAGGED		false
+#define MVPP2_PRS_EDSA			true
+#define MVPP2_PRS_DSA			false
+
+/* MAC entries, shadow udf */
+enum mvpp2_prs_udf {
+	MVPP2_PRS_UDF_MAC_DEF,
+	MVPP2_PRS_UDF_MAC_RANGE,
+	MVPP2_PRS_UDF_L2_DEF,
+	MVPP2_PRS_UDF_L2_DEF_COPY,
+	MVPP2_PRS_UDF_L2_USER,
+};
+
+/* Lookup ID */
+enum mvpp2_prs_lookup {
+	MVPP2_PRS_LU_MH,
+	MVPP2_PRS_LU_MAC,
+	MVPP2_PRS_LU_DSA,
+	MVPP2_PRS_LU_VLAN,
+	MVPP2_PRS_LU_L2,
+	MVPP2_PRS_LU_PPPOE,
+	MVPP2_PRS_LU_IP4,
+	MVPP2_PRS_LU_IP6,
+	MVPP2_PRS_LU_FLOWS,
+	MVPP2_PRS_LU_LAST,
+};
+
+/* L3 cast enum */
+enum mvpp2_prs_l3_cast {
+	MVPP2_PRS_L3_UNI_CAST,
+	MVPP2_PRS_L3_MULTI_CAST,
+	MVPP2_PRS_L3_BROAD_CAST
+};
+
+/* Classifier constants */
+#define MVPP2_CLS_FLOWS_TBL_SIZE	512
+#define MVPP2_CLS_FLOWS_TBL_DATA_WORDS	3
+#define MVPP2_CLS_LKP_TBL_SIZE		64
+
+/* BM constants */
+#define MVPP2_BM_POOLS_NUM		8
+#define MVPP2_BM_LONG_BUF_NUM		1024
+#define MVPP2_BM_SHORT_BUF_NUM		2048
+#define MVPP2_BM_POOL_SIZE_MAX		(16*1024 - MVPP2_BM_POOL_PTR_ALIGN/4)
+#define MVPP2_BM_POOL_PTR_ALIGN		128
+#define MVPP2_BM_SWF_LONG_POOL(port)	((port > 2) ? 2 : port)
+#define MVPP2_BM_SWF_SHORT_POOL		3
+
+/* BM cookie (32 bits) definition */
+#define MVPP2_BM_COOKIE_POOL_OFFS	8
+#define MVPP2_BM_COOKIE_CPU_OFFS	24
+
+/* BM short pool packet size
+ * These value assure that for SWF the total number
+ * of bytes allocated for each buffer will be 512
+ */
+#define MVPP2_BM_SHORT_PKT_SIZE		MVPP2_RX_MAX_PKT_SIZE(512)
+
+enum mvpp2_bm_type {
+	MVPP2_BM_FREE,
+	MVPP2_BM_SWF_LONG,
+	MVPP2_BM_SWF_SHORT
+};
+
+/* Definitions */
+
+/* Shared Packet Processor resources */
+struct mvpp2 {
+	/* Shared registers' base addresses */
+	void __iomem *base;
+	void __iomem *lms_base;
+
+	/* Common clocks */
+	struct clk *pp_clk;
+	struct clk *gop_clk;
+
+	/* List of pointers to port structures */
+	struct mvpp2_port **port_list;
+
+	/* Aggregated TXQs */
+	struct mvpp2_tx_queue *aggr_txqs;
+
+	/* BM pools */
+	struct mvpp2_bm_pool *bm_pools;
+
+	/* PRS shadow table */
+	struct mvpp2_prs_shadow *prs_shadow;
+	/* PRS auxiliary table for double vlan entries control */
+	bool *prs_double_vlans;
+
+	/* Tclk value */
+	u32 tclk;
+};
+
+struct mvpp2_pcpu_stats {
+	struct	u64_stats_sync syncp;
+	u64	rx_packets;
+	u64	rx_bytes;
+	u64	tx_packets;
+	u64	tx_bytes;
+};
+
+struct mvpp2_port {
+	u8 id;
+
+	int irq;
+
+	struct mvpp2 *priv;
+
+	/* Per-port registers' base address */
+	void __iomem *base;
+
+	struct mvpp2_rx_queue **rxqs;
+	struct mvpp2_tx_queue **txqs;
+	struct net_device *dev;
+
+	int pkt_size;
+
+	u32 pending_cause_rx;
+	struct napi_struct napi;
+
+	/* Flags */
+	unsigned long flags;
+
+	u16 tx_ring_size;
+	u16 rx_ring_size;
+	struct mvpp2_pcpu_stats __percpu *stats;
+
+	struct phy_device *phy_dev;
+	phy_interface_t phy_interface;
+	struct device_node *phy_node;
+	unsigned int link;
+	unsigned int duplex;
+	unsigned int speed;
+
+	struct mvpp2_bm_pool *pool_long;
+	struct mvpp2_bm_pool *pool_short;
+
+	/* Index of first port's physical RXQ */
+	u8 first_rxq;
+};
+
+/* The mvpp2_tx_desc and mvpp2_rx_desc structures describe the
+ * layout of the transmit and reception DMA descriptors, and their
+ * layout is therefore defined by the hardware design
+ */
+
+#define MVPP2_TXD_L3_OFF_SHIFT		0
+#define MVPP2_TXD_IP_HLEN_SHIFT		8
+#define MVPP2_TXD_L4_CSUM_FRAG		BIT(13)
+#define MVPP2_TXD_L4_CSUM_NOT		BIT(14)
+#define MVPP2_TXD_IP_CSUM_DISABLE	BIT(15)
+#define MVPP2_TXD_PADDING_DISABLE	BIT(23)
+#define MVPP2_TXD_L4_UDP		BIT(24)
+#define MVPP2_TXD_L3_IP6		BIT(26)
+#define MVPP2_TXD_L_DESC		BIT(28)
+#define MVPP2_TXD_F_DESC		BIT(29)
+
+#define MVPP2_RXD_ERR_SUMMARY		BIT(15)
+#define MVPP2_RXD_ERR_CODE_MASK		(BIT(13) | BIT(14))
+#define MVPP2_RXD_ERR_CRC		0x0
+#define MVPP2_RXD_ERR_OVERRUN		BIT(13)
+#define MVPP2_RXD_ERR_RESOURCE		(BIT(13) | BIT(14))
+#define MVPP2_RXD_BM_POOL_ID_OFFS	16
+#define MVPP2_RXD_BM_POOL_ID_MASK	(BIT(16) | BIT(17) | BIT(18))
+#define MVPP2_RXD_HWF_SYNC		BIT(21)
+#define MVPP2_RXD_L4_CSUM_OK		BIT(22)
+#define MVPP2_RXD_IP4_HEADER_ERR	BIT(24)
+#define MVPP2_RXD_L4_TCP		BIT(25)
+#define MVPP2_RXD_L4_UDP		BIT(26)
+#define MVPP2_RXD_L3_IP4		BIT(28)
+#define MVPP2_RXD_L3_IP6		BIT(30)
+#define MVPP2_RXD_BUF_HDR		BIT(31)
+
+struct mvpp2_tx_desc {
+	u32 command;		/* Options used by HW for packet transmitting.*/
+	u8  packet_offset;	/* the offset from the buffer beginning	*/
+	u8  phys_txq;		/* destination queue ID			*/
+	u16 data_size;		/* data size of transmitted packet in bytes */
+	u32 buf_phys_addr;	/* physical addr of transmitted buffer	*/
+	u32 buf_cookie;		/* cookie for access to TX buffer in tx path */
+	u32 reserved1[3];	/* hw_cmd (for future use, BM, PON, PNC) */
+	u32 reserved2;		/* reserved (for future use)		*/
+};
+
+struct mvpp2_rx_desc {
+	u32 status;		/* info about received packet		*/
+	u16 reserved1;		/* parser_info (for future use, PnC)	*/
+	u16 data_size;		/* size of received packet in bytes	*/
+	u32 buf_phys_addr;	/* physical address of the buffer	*/
+	u32 buf_cookie;		/* cookie for access to RX buffer in rx path */
+	u16 reserved2;		/* gem_port_id (for future use, PON)	*/
+	u16 reserved3;		/* csum_l4 (for future use, PnC)	*/
+	u8  reserved4;		/* bm_qset (for future use, BM)		*/
+	u8  reserved5;
+	u16 reserved6;		/* classify_info (for future use, PnC)	*/
+	u32 reserved7;		/* flow_id (for future use, PnC) */
+	u32 reserved8;
+};
+
+/* Per-CPU Tx queue control */
+struct mvpp2_txq_pcpu {
+	int cpu;
+
+	/* Number of Tx DMA descriptors in the descriptor ring */
+	int size;
+
+	/* Number of currently used Tx DMA descriptor in the
+	 * descriptor ring
+	 */
+	int count;
+
+	/* Number of Tx DMA descriptors reserved for each CPU */
+	int reserved_num;
+
+	/* Array of transmitted skb */
+	struct sk_buff **tx_skb;
+
+	/* Index of last TX DMA descriptor that was inserted */
+	int txq_put_index;
+
+	/* Index of the TX DMA descriptor to be cleaned up */
+	int txq_get_index;
+};
+
+struct mvpp2_tx_queue {
+	/* Physical number of this Tx queue */
+	u8 id;
+
+	/* Logical number of this Tx queue */
+	u8 log_id;
+
+	/* Number of Tx DMA descriptors in the descriptor ring */
+	int size;
+
+	/* Number of currently used Tx DMA descriptor in the descriptor ring */
+	int count;
+
+	/* Per-CPU control of physical Tx queues */
+	struct mvpp2_txq_pcpu __percpu *pcpu;
+
+	/* Array of transmitted skb */
+	struct sk_buff **tx_skb;
+
+	u32 done_pkts_coal;
+
+	/* Virtual address of thex Tx DMA descriptors array */
+	struct mvpp2_tx_desc *descs;
+
+	/* DMA address of the Tx DMA descriptors array */
+	dma_addr_t descs_phys;
+
+	/* Index of the last Tx DMA descriptor */
+	int last_desc;
+
+	/* Index of the next Tx DMA descriptor to process */
+	int next_desc_to_proc;
+};
+
+struct mvpp2_rx_queue {
+	/* RX queue number, in the range 0-31 for physical RXQs */
+	u8 id;
+
+	/* Num of rx descriptors in the rx descriptor ring */
+	int size;
+
+	u32 pkts_coal;
+	u32 time_coal;
+
+	/* Virtual address of the RX DMA descriptors array */
+	struct mvpp2_rx_desc *descs;
+
+	/* DMA address of the RX DMA descriptors array */
+	dma_addr_t descs_phys;
+
+	/* Index of the last RX DMA descriptor */
+	int last_desc;
+
+	/* Index of the next RX DMA descriptor to process */
+	int next_desc_to_proc;
+
+	/* ID of port to which physical RXQ is mapped */
+	int port;
+
+	/* Port's logic RXQ number to which physical RXQ is mapped */
+	int logic_rxq;
+};
+
+union mvpp2_prs_tcam_entry {
+	u32 word[MVPP2_PRS_TCAM_WORDS];
+	u8  byte[MVPP2_PRS_TCAM_WORDS * 4];
+};
+
+union mvpp2_prs_sram_entry {
+	u32 word[MVPP2_PRS_SRAM_WORDS];
+	u8  byte[MVPP2_PRS_SRAM_WORDS * 4];
+};
+
+struct mvpp2_prs_entry {
+	u32 index;
+	union mvpp2_prs_tcam_entry tcam;
+	union mvpp2_prs_sram_entry sram;
+};
+
+struct mvpp2_prs_shadow {
+	bool valid;
+	bool finish;
+
+	/* Lookup ID */
+	int lu;
+
+	/* User defined offset */
+	int udf;
+
+	/* Result info */
+	u32 ri;
+	u32 ri_mask;
+};
+
+struct mvpp2_cls_flow_entry {
+	u32 index;
+	u32 data[MVPP2_CLS_FLOWS_TBL_DATA_WORDS];
+};
+
+struct mvpp2_cls_lookup_entry {
+	u32 lkpid;
+	u32 way;
+	u32 data;
+};
+
+struct mvpp2_bm_pool {
+	/* Pool number in the range 0-7 */
+	int id;
+	enum mvpp2_bm_type type;
+
+	/* Buffer Pointers Pool External (BPPE) size */
+	int size;
+	/* Number of buffers for this pool */
+	int buf_num;
+	/* Pool buffer size */
+	int buf_size;
+	/* Packet size */
+	int pkt_size;
+
+	/* BPPE virtual base address */
+	u32 *virt_addr;
+	/* BPPE physical base address */
+	dma_addr_t phys_addr;
+
+	/* Ports using BM pool */
+	u32 port_map;
+
+	/* Occupied buffers indicator */
+	atomic_t in_use;
+	int in_use_thresh;
+
+	spinlock_t lock;
+};
+
+struct mvpp2_buff_hdr {
+	u32 next_buff_phys_addr;
+	u32 next_buff_virt_addr;
+	u16 byte_count;
+	u16 info;
+	u8  reserved1;		/* bm_qset (for future use, BM)		*/
+};
+
+/* Buffer header info bits */
+#define MVPP2_B_HDR_INFO_MC_ID_MASK	0xfff
+#define MVPP2_B_HDR_INFO_MC_ID(info)	((info) & MVPP2_B_HDR_INFO_MC_ID_MASK)
+#define MVPP2_B_HDR_INFO_LAST_OFFS	12
+#define MVPP2_B_HDR_INFO_LAST_MASK	BIT(12)
+#define MVPP2_B_HDR_INFO_IS_LAST(info) \
+	   ((info & MVPP2_B_HDR_INFO_LAST_MASK) >> MVPP2_B_HDR_INFO_LAST_OFFS)
+
+/* Static declaractions */
+
+/* Number of RXQs used by single port */
+static int rxq_number = MVPP2_DEFAULT_RXQ;
+/* Number of TXQs used by single port */
+static int txq_number = MVPP2_MAX_TXQ;
+
+#define MVPP2_DRIVER_NAME "mvpp2"
+#define MVPP2_DRIVER_VERSION "1.0"
+
+/* Utility/helper methods */
+
+static void mvpp2_write(struct mvpp2 *priv, u32 offset, u32 data)
+{
+	writel(data, priv->base + offset);
+}
+
+static u32 mvpp2_read(struct mvpp2 *priv, u32 offset)
+{
+	return readl(priv->base + offset);
+}
+
+static void mvpp2_txq_inc_get(struct mvpp2_txq_pcpu *txq_pcpu)
+{
+	txq_pcpu->txq_get_index++;
+	if (txq_pcpu->txq_get_index == txq_pcpu->size)
+		txq_pcpu->txq_get_index = 0;
+}
+
+static void mvpp2_txq_inc_put(struct mvpp2_txq_pcpu *txq_pcpu,
+			      struct sk_buff *skb)
+{
+	txq_pcpu->tx_skb[txq_pcpu->txq_put_index] = skb;
+	txq_pcpu->txq_put_index++;
+	if (txq_pcpu->txq_put_index == txq_pcpu->size)
+		txq_pcpu->txq_put_index = 0;
+}
+
+/* Get number of physical egress port */
+static inline int mvpp2_egress_port(struct mvpp2_port *port)
+{
+	return MVPP2_MAX_TCONT + port->id;
+}
+
+/* Get number of physical TXQ */
+static inline int mvpp2_txq_phys(int port, int txq)
+{
+	return (MVPP2_MAX_TCONT + port) * MVPP2_MAX_TXQ + txq;
+}
+
+/* Parser configuration routines */
+
+/* Update parser tcam and sram hw entries */
+static int mvpp2_prs_hw_write(struct mvpp2 *priv, struct mvpp2_prs_entry *pe)
+{
+	int i;
+
+	if (pe->index > MVPP2_PRS_TCAM_SRAM_SIZE - 1)
+		return -EINVAL;
+
+	/* Clear entry invalidation bit */
+	pe->tcam.word[MVPP2_PRS_TCAM_INV_WORD] &= ~MVPP2_PRS_TCAM_INV_MASK;
+
+	/* Write tcam index - indirect access */
+	mvpp2_write(priv, MVPP2_PRS_TCAM_IDX_REG, pe->index);
+	for (i = 0; i < MVPP2_PRS_TCAM_WORDS; i++)
+		mvpp2_write(priv, MVPP2_PRS_TCAM_DATA_REG(i), pe->tcam.word[i]);
+
+	/* Write sram index - indirect access */
+	mvpp2_write(priv, MVPP2_PRS_SRAM_IDX_REG, pe->index);
+	for (i = 0; i < MVPP2_PRS_SRAM_WORDS; i++)
+		mvpp2_write(priv, MVPP2_PRS_SRAM_DATA_REG(i), pe->sram.word[i]);
+
+	return 0;
+}
+
+/* Read tcam entry from hw */
+static int mvpp2_prs_hw_read(struct mvpp2 *priv, struct mvpp2_prs_entry *pe)
+{
+	int i;
+
+	if (pe->index > MVPP2_PRS_TCAM_SRAM_SIZE - 1)
+		return -EINVAL;
+
+	/* Write tcam index - indirect access */
+	mvpp2_write(priv, MVPP2_PRS_TCAM_IDX_REG, pe->index);
+
+	pe->tcam.word[MVPP2_PRS_TCAM_INV_WORD] = mvpp2_read(priv,
+			      MVPP2_PRS_TCAM_DATA_REG(MVPP2_PRS_TCAM_INV_WORD));
+	if (pe->tcam.word[MVPP2_PRS_TCAM_INV_WORD] & MVPP2_PRS_TCAM_INV_MASK)
+		return MVPP2_PRS_TCAM_ENTRY_INVALID;
+
+	for (i = 0; i < MVPP2_PRS_TCAM_WORDS; i++)
+		pe->tcam.word[i] = mvpp2_read(priv, MVPP2_PRS_TCAM_DATA_REG(i));
+
+	/* Write sram index - indirect access */
+	mvpp2_write(priv, MVPP2_PRS_SRAM_IDX_REG, pe->index);
+	for (i = 0; i < MVPP2_PRS_SRAM_WORDS; i++)
+		pe->sram.word[i] = mvpp2_read(priv, MVPP2_PRS_SRAM_DATA_REG(i));
+
+	return 0;
+}
+
+/* Invalidate tcam hw entry */
+static void mvpp2_prs_hw_inv(struct mvpp2 *priv, int index)
+{
+	/* Write index - indirect access */
+	mvpp2_write(priv, MVPP2_PRS_TCAM_IDX_REG, index);
+	mvpp2_write(priv, MVPP2_PRS_TCAM_DATA_REG(MVPP2_PRS_TCAM_INV_WORD),
+		    MVPP2_PRS_TCAM_INV_MASK);
+}
+
+/* Enable shadow table entry and set its lookup ID */
+static void mvpp2_prs_shadow_set(struct mvpp2 *priv, int index, int lu)
+{
+	priv->prs_shadow[index].valid = true;
+	priv->prs_shadow[index].lu = lu;
+}
+
+/* Update ri fields in shadow table entry */
+static void mvpp2_prs_shadow_ri_set(struct mvpp2 *priv, int index,
+				    unsigned int ri, unsigned int ri_mask)
+{
+	priv->prs_shadow[index].ri_mask = ri_mask;
+	priv->prs_shadow[index].ri = ri;
+}
+
+/* Update lookup field in tcam sw entry */
+static void mvpp2_prs_tcam_lu_set(struct mvpp2_prs_entry *pe, unsigned int lu)
+{
+	int enable_off = MVPP2_PRS_TCAM_EN_OFFS(MVPP2_PRS_TCAM_LU_BYTE);
+
+	pe->tcam.byte[MVPP2_PRS_TCAM_LU_BYTE] = lu;
+	pe->tcam.byte[enable_off] = MVPP2_PRS_LU_MASK;
+}
+
+/* Update mask for single port in tcam sw entry */
+static void mvpp2_prs_tcam_port_set(struct mvpp2_prs_entry *pe,
+				    unsigned int port, bool add)
+{
+	int enable_off = MVPP2_PRS_TCAM_EN_OFFS(MVPP2_PRS_TCAM_PORT_BYTE);
+
+	if (add)
+		pe->tcam.byte[enable_off] &= ~(1 << port);
+	else
+		pe->tcam.byte[enable_off] |= 1 << port;
+}
+
+/* Update port map in tcam sw entry */
+static void mvpp2_prs_tcam_port_map_set(struct mvpp2_prs_entry *pe,
+					unsigned int ports)
+{
+	unsigned char port_mask = MVPP2_PRS_PORT_MASK;
+	int enable_off = MVPP2_PRS_TCAM_EN_OFFS(MVPP2_PRS_TCAM_PORT_BYTE);
+
+	pe->tcam.byte[MVPP2_PRS_TCAM_PORT_BYTE] = 0;
+	pe->tcam.byte[enable_off] &= ~port_mask;
+	pe->tcam.byte[enable_off] |= ~ports & MVPP2_PRS_PORT_MASK;
+}
+
+/* Obtain port map from tcam sw entry */
+static unsigned int mvpp2_prs_tcam_port_map_get(struct mvpp2_prs_entry *pe)
+{
+	int enable_off = MVPP2_PRS_TCAM_EN_OFFS(MVPP2_PRS_TCAM_PORT_BYTE);
+
+	return ~(pe->tcam.byte[enable_off]) & MVPP2_PRS_PORT_MASK;
+}
+
+/* Set byte of data and its enable bits in tcam sw entry */
+static void mvpp2_prs_tcam_data_byte_set(struct mvpp2_prs_entry *pe,
+					 unsigned int offs, unsigned char byte,
+					 unsigned char enable)
+{
+	pe->tcam.byte[MVPP2_PRS_TCAM_DATA_BYTE(offs)] = byte;
+	pe->tcam.byte[MVPP2_PRS_TCAM_DATA_BYTE_EN(offs)] = enable;
+}
+
+/* Get byte of data and its enable bits from tcam sw entry */
+static void mvpp2_prs_tcam_data_byte_get(struct mvpp2_prs_entry *pe,
+					 unsigned int offs, unsigned char *byte,
+					 unsigned char *enable)
+{
+	*byte = pe->tcam.byte[MVPP2_PRS_TCAM_DATA_BYTE(offs)];
+	*enable = pe->tcam.byte[MVPP2_PRS_TCAM_DATA_BYTE_EN(offs)];
+}
+
+/* Compare tcam data bytes with a pattern */
+static bool mvpp2_prs_tcam_data_cmp(struct mvpp2_prs_entry *pe, int offs,
+				    u16 data)
+{
+	int off = MVPP2_PRS_TCAM_DATA_BYTE(offs);
+	u16 tcam_data;
+
+	tcam_data = (8 << pe->tcam.byte[off + 1]) | pe->tcam.byte[off];
+	if (tcam_data != data)
+		return false;
+	return true;
+}
+
+/* Update ai bits in tcam sw entry */
+static void mvpp2_prs_tcam_ai_update(struct mvpp2_prs_entry *pe,
+				     unsigned int bits, unsigned int enable)
+{
+	int i, ai_idx = MVPP2_PRS_TCAM_AI_BYTE;
+
+	for (i = 0; i < MVPP2_PRS_AI_BITS; i++) {
+
+		if (!(enable & BIT(i)))
+			continue;
+
+		if (bits & BIT(i))
+			pe->tcam.byte[ai_idx] |= 1 << i;
+		else
+			pe->tcam.byte[ai_idx] &= ~(1 << i);
+	}
+
+	pe->tcam.byte[MVPP2_PRS_TCAM_EN_OFFS(ai_idx)] |= enable;
+}
+
+/* Get ai bits from tcam sw entry */
+static int mvpp2_prs_tcam_ai_get(struct mvpp2_prs_entry *pe)
+{
+	return pe->tcam.byte[MVPP2_PRS_TCAM_AI_BYTE];
+}
+
+/* Set ethertype in tcam sw entry */
+static void mvpp2_prs_match_etype(struct mvpp2_prs_entry *pe, int offset,
+				  unsigned short ethertype)
+{
+	mvpp2_prs_tcam_data_byte_set(pe, offset + 0, ethertype >> 8, 0xff);
+	mvpp2_prs_tcam_data_byte_set(pe, offset + 1, ethertype & 0xff, 0xff);
+}
+
+/* Set bits in sram sw entry */
+static void mvpp2_prs_sram_bits_set(struct mvpp2_prs_entry *pe, int bit_num,
+				    int val)
+{
+	pe->sram.byte[MVPP2_BIT_TO_BYTE(bit_num)] |= (val << (bit_num % 8));
+}
+
+/* Clear bits in sram sw entry */
+static void mvpp2_prs_sram_bits_clear(struct mvpp2_prs_entry *pe, int bit_num,
+				      int val)
+{
+	pe->sram.byte[MVPP2_BIT_TO_BYTE(bit_num)] &= ~(val << (bit_num % 8));
+}
+
+/* Update ri bits in sram sw entry */
+static void mvpp2_prs_sram_ri_update(struct mvpp2_prs_entry *pe,
+				     unsigned int bits, unsigned int mask)
+{
+	unsigned int i;
+
+	for (i = 0; i < MVPP2_PRS_SRAM_RI_CTRL_BITS; i++) {
+		int ri_off = MVPP2_PRS_SRAM_RI_OFFS;
+
+		if (!(mask & BIT(i)))
+			continue;
+
+		if (bits & BIT(i))
+			mvpp2_prs_sram_bits_set(pe, ri_off + i, 1);
+		else
+			mvpp2_prs_sram_bits_clear(pe, ri_off + i, 1);
+
+		mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_RI_CTRL_OFFS + i, 1);
+	}
+}
+
+/* Obtain ri bits from sram sw entry */
+static int mvpp2_prs_sram_ri_get(struct mvpp2_prs_entry *pe)
+{
+	return pe->sram.word[MVPP2_PRS_SRAM_RI_WORD];
+}
+
+/* Update ai bits in sram sw entry */
+static void mvpp2_prs_sram_ai_update(struct mvpp2_prs_entry *pe,
+				     unsigned int bits, unsigned int mask)
+{
+	unsigned int i;
+	int ai_off = MVPP2_PRS_SRAM_AI_OFFS;
+
+	for (i = 0; i < MVPP2_PRS_SRAM_AI_CTRL_BITS; i++) {
+
+		if (!(mask & BIT(i)))
+			continue;
+
+		if (bits & BIT(i))
+			mvpp2_prs_sram_bits_set(pe, ai_off + i, 1);
+		else
+			mvpp2_prs_sram_bits_clear(pe, ai_off + i, 1);
+
+		mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_AI_CTRL_OFFS + i, 1);
+	}
+}
+
+/* Read ai bits from sram sw entry */
+static int mvpp2_prs_sram_ai_get(struct mvpp2_prs_entry *pe)
+{
+	u8 bits;
+	int ai_off = MVPP2_BIT_TO_BYTE(MVPP2_PRS_SRAM_AI_OFFS);
+	int ai_en_off = ai_off + 1;
+	int ai_shift = MVPP2_PRS_SRAM_AI_OFFS % 8;
+
+	bits = (pe->sram.byte[ai_off] >> ai_shift) |
+	       (pe->sram.byte[ai_en_off] << (8 - ai_shift));
+
+	return bits;
+}
+
+/* In sram sw entry set lookup ID field of the tcam key to be used in the next
+ * lookup interation
+ */
+static void mvpp2_prs_sram_next_lu_set(struct mvpp2_prs_entry *pe,
+				       unsigned int lu)
+{
+	int sram_next_off = MVPP2_PRS_SRAM_NEXT_LU_OFFS;
+
+	mvpp2_prs_sram_bits_clear(pe, sram_next_off,
+				  MVPP2_PRS_SRAM_NEXT_LU_MASK);
+	mvpp2_prs_sram_bits_set(pe, sram_next_off, lu);
+}
+
+/* In the sram sw entry set sign and value of the next lookup offset
+ * and the offset value generated to the classifier
+ */
+static void mvpp2_prs_sram_shift_set(struct mvpp2_prs_entry *pe, int shift,
+				     unsigned int op)
+{
+	/* Set sign */
+	if (shift < 0) {
+		mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_SHIFT_SIGN_BIT, 1);
+		shift = 0 - shift;
+	} else {
+		mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_SHIFT_SIGN_BIT, 1);
+	}
+
+	/* Set value */
+	pe->sram.byte[MVPP2_BIT_TO_BYTE(MVPP2_PRS_SRAM_SHIFT_OFFS)] =
+							   (unsigned char)shift;
+
+	/* Reset and set operation */
+	mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_OP_SEL_SHIFT_OFFS,
+				  MVPP2_PRS_SRAM_OP_SEL_SHIFT_MASK);
+	mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_OP_SEL_SHIFT_OFFS, op);
+
+	/* Set base offset as current */
+	mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_OP_SEL_BASE_OFFS, 1);
+}
+
+/* In the sram sw entry set sign and value of the user defined offset
+ * generated to the classifier
+ */
+static void mvpp2_prs_sram_offset_set(struct mvpp2_prs_entry *pe,
+				      unsigned int type, int offset,
+				      unsigned int op)
+{
+	/* Set sign */
+	if (offset < 0) {
+		mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_UDF_SIGN_BIT, 1);
+		offset = 0 - offset;
+	} else {
+		mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_UDF_SIGN_BIT, 1);
+	}
+
+	/* Set value */
+	mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_UDF_OFFS,
+				  MVPP2_PRS_SRAM_UDF_MASK);
+	mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_UDF_OFFS, offset);
+	pe->sram.byte[MVPP2_BIT_TO_BYTE(MVPP2_PRS_SRAM_UDF_OFFS +
+					MVPP2_PRS_SRAM_UDF_BITS)] &=
+	      ~(MVPP2_PRS_SRAM_UDF_MASK >> (8 - (MVPP2_PRS_SRAM_UDF_OFFS % 8)));
+	pe->sram.byte[MVPP2_BIT_TO_BYTE(MVPP2_PRS_SRAM_UDF_OFFS +
+					MVPP2_PRS_SRAM_UDF_BITS)] |=
+				(offset >> (8 - (MVPP2_PRS_SRAM_UDF_OFFS % 8)));
+
+	/* Set offset type */
+	mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_UDF_TYPE_OFFS,
+				  MVPP2_PRS_SRAM_UDF_TYPE_MASK);
+	mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_UDF_TYPE_OFFS, type);
+
+	/* Set offset operation */
+	mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_OP_SEL_UDF_OFFS,
+				  MVPP2_PRS_SRAM_OP_SEL_UDF_MASK);
+	mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_OP_SEL_UDF_OFFS, op);
+
+	pe->sram.byte[MVPP2_BIT_TO_BYTE(MVPP2_PRS_SRAM_OP_SEL_UDF_OFFS +
+					MVPP2_PRS_SRAM_OP_SEL_UDF_BITS)] &=
+					     ~(MVPP2_PRS_SRAM_OP_SEL_UDF_MASK >>
+				    (8 - (MVPP2_PRS_SRAM_OP_SEL_UDF_OFFS % 8)));
+
+	pe->sram.byte[MVPP2_BIT_TO_BYTE(MVPP2_PRS_SRAM_OP_SEL_UDF_OFFS +
+					MVPP2_PRS_SRAM_OP_SEL_UDF_BITS)] |=
+			     (op >> (8 - (MVPP2_PRS_SRAM_OP_SEL_UDF_OFFS % 8)));
+
+	/* Set base offset as current */
+	mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_OP_SEL_BASE_OFFS, 1);
+}
+
+/* Find parser flow entry */
+static struct mvpp2_prs_entry *mvpp2_prs_flow_find(struct mvpp2 *priv, int flow)
+{
+	struct mvpp2_prs_entry *pe;
+	int tid;
+
+	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+	if (!pe)
+		return NULL;
+	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_FLOWS);
+
+	/* Go through the all entires with MVPP2_PRS_LU_FLOWS */
+	for (tid = MVPP2_PRS_TCAM_SRAM_SIZE - 1; tid >= 0; tid--) {
+		u8 bits;
+
+		if (!priv->prs_shadow[tid].valid ||
+		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_FLOWS)
+			continue;
+
+		pe->index = tid;
+		mvpp2_prs_hw_read(priv, pe);
+		bits = mvpp2_prs_sram_ai_get(pe);
+
+		/* Sram store classification lookup ID in AI bits [5:0] */
+		if ((bits & MVPP2_PRS_FLOW_ID_MASK) == flow)
+			return pe;
+	}
+	kfree(pe);
+
+	return NULL;
+}
+
+/* Return first free tcam index, seeking from start to end */
+static int mvpp2_prs_tcam_first_free(struct mvpp2 *priv, unsigned char start,
+				     unsigned char end)
+{
+	int tid;
+
+	if (start > end)
+		swap(start, end);
+
+	if (end >= MVPP2_PRS_TCAM_SRAM_SIZE)
+		end = MVPP2_PRS_TCAM_SRAM_SIZE - 1;
+
+	for (tid = start; tid <= end; tid++) {
+		if (!priv->prs_shadow[tid].valid)
+			return tid;
+	}
+
+	return -EINVAL;
+}
+
+/* Enable/disable dropping all mac da's */
+static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add)
+{
+	struct mvpp2_prs_entry pe;
+
+	if (priv->prs_shadow[MVPP2_PE_DROP_ALL].valid) {
+		/* Entry exist - update port only */
+		pe.index = MVPP2_PE_DROP_ALL;
+		mvpp2_prs_hw_read(priv, &pe);
+	} else {
+		/* Entry doesn't exist - create new */
+		memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
+		pe.index = MVPP2_PE_DROP_ALL;
+
+		/* Non-promiscuous mode for all ports - DROP unknown packets */
+		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK,
+					 MVPP2_PRS_RI_DROP_MASK);
+
+		mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
+		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+
+		/* Update shadow table */
+		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
+
+		/* Mask all ports */
+		mvpp2_prs_tcam_port_map_set(&pe, 0);
+	}
+
+	/* Update port mask */
+	mvpp2_prs_tcam_port_set(&pe, port, add);
+
+	mvpp2_prs_hw_write(priv, &pe);
+}
+
+/* Set port to promiscuous mode */
+static void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, bool add)
+{
+	struct mvpp2_prs_entry pe;
+
+	/* Promiscous mode - Accept unknown packets */
+
+	if (priv->prs_shadow[MVPP2_PE_MAC_PROMISCUOUS].valid) {
+		/* Entry exist - update port only */
+		pe.index = MVPP2_PE_MAC_PROMISCUOUS;
+		mvpp2_prs_hw_read(priv, &pe);
+	} else {
+		/* Entry doesn't exist - create new */
+		memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
+		pe.index = MVPP2_PE_MAC_PROMISCUOUS;
+
+		/* Continue - set next lookup */
+		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_DSA);
+
+		/* Set result info bits */
+		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L2_UCAST,
+					 MVPP2_PRS_RI_L2_CAST_MASK);
+
+		/* Shift to ethertype */
+		mvpp2_prs_sram_shift_set(&pe, 2 * ETH_ALEN,
+					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+		/* Mask all ports */
+		mvpp2_prs_tcam_port_map_set(&pe, 0);
+
+		/* Update shadow table */
+		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
+	}
+
+	/* Update port mask */
+	mvpp2_prs_tcam_port_set(&pe, port, add);
+
+	mvpp2_prs_hw_write(priv, &pe);
+}
+
+/* Accept multicast */
+static void mvpp2_prs_mac_multi_set(struct mvpp2 *priv, int port, int index,
+				    bool add)
+{
+	struct mvpp2_prs_entry pe;
+	unsigned char da_mc;
+
+	/* Ethernet multicast address first byte is
+	 * 0x01 for IPv4 and 0x33 for IPv6
+	 */
+	da_mc = (index == MVPP2_PE_MAC_MC_ALL) ? 0x01 : 0x33;
+
+	if (priv->prs_shadow[index].valid) {
+		/* Entry exist - update port only */
+		pe.index = index;
+		mvpp2_prs_hw_read(priv, &pe);
+	} else {
+		/* Entry doesn't exist - create new */
+		memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
+		pe.index = index;
+
+		/* Continue - set next lookup */
+		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_DSA);
+
+		/* Set result info bits */
+		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L2_MCAST,
+					 MVPP2_PRS_RI_L2_CAST_MASK);
+
+		/* Update tcam entry data first byte */
+		mvpp2_prs_tcam_data_byte_set(&pe, 0, da_mc, 0xff);
+
+		/* Shift to ethertype */
+		mvpp2_prs_sram_shift_set(&pe, 2 * ETH_ALEN,
+					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+		/* Mask all ports */
+		mvpp2_prs_tcam_port_map_set(&pe, 0);
+
+		/* Update shadow table */
+		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
+	}
+
+	/* Update port mask */
+	mvpp2_prs_tcam_port_set(&pe, port, add);
+
+	mvpp2_prs_hw_write(priv, &pe);
+}
+
+/* Set entry for dsa packets */
+static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add,
+				  bool tagged, bool extend)
+{
+	struct mvpp2_prs_entry pe;
+	int tid, shift;
+
+	if (extend) {
+		tid = tagged ? MVPP2_PE_EDSA_TAGGED : MVPP2_PE_EDSA_UNTAGGED;
+		shift = 8;
+	} else {
+		tid = tagged ? MVPP2_PE_DSA_TAGGED : MVPP2_PE_DSA_UNTAGGED;
+		shift = 4;
+	}
+
+	if (priv->prs_shadow[tid].valid) {
+		/* Entry exist - update port only */
+		pe.index = tid;
+		mvpp2_prs_hw_read(priv, &pe);
+	} else {
+		/* Entry doesn't exist - create new */
+		memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA);
+		pe.index = tid;
+
+		/* Shift 4 bytes if DSA tag or 8 bytes in case of EDSA tag*/
+		mvpp2_prs_sram_shift_set(&pe, shift,
+					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+		/* Update shadow table */
+		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_DSA);
+
+		if (tagged) {
+			/* Set tagged bit in DSA tag */
+			mvpp2_prs_tcam_data_byte_set(&pe, 0,
+						     MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
+						     MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
+			/* Clear all ai bits for next iteration */
+			mvpp2_prs_sram_ai_update(&pe, 0,
+						 MVPP2_PRS_SRAM_AI_MASK);
+			/* If packet is tagged continue check vlans */
+			mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+		} else {
+			/* Set result info bits to 'no vlans' */
+			mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE,
+						 MVPP2_PRS_RI_VLAN_MASK);
+			mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+		}
+
+		/* Mask all ports */
+		mvpp2_prs_tcam_port_map_set(&pe, 0);
+	}
+
+	/* Update port mask */
+	mvpp2_prs_tcam_port_set(&pe, port, add);
+
+	mvpp2_prs_hw_write(priv, &pe);
+}
+
+/* Set entry for dsa ethertype */
+static void mvpp2_prs_dsa_tag_ethertype_set(struct mvpp2 *priv, int port,
+					    bool add, bool tagged, bool extend)
+{
+	struct mvpp2_prs_entry pe;
+	int tid, shift, port_mask;
+
+	if (extend) {
+		tid = tagged ? MVPP2_PE_ETYPE_EDSA_TAGGED :
+		      MVPP2_PE_ETYPE_EDSA_UNTAGGED;
+		port_mask = 0;
+		shift = 8;
+	} else {
+		tid = tagged ? MVPP2_PE_ETYPE_DSA_TAGGED :
+		      MVPP2_PE_ETYPE_DSA_UNTAGGED;
+		port_mask = MVPP2_PRS_PORT_MASK;
+		shift = 4;
+	}
+
+	if (priv->prs_shadow[tid].valid) {
+		/* Entry exist - update port only */
+		pe.index = tid;
+		mvpp2_prs_hw_read(priv, &pe);
+	} else {
+		/* Entry doesn't exist - create new */
+		memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA);
+		pe.index = tid;
+
+		/* Set ethertype */
+		mvpp2_prs_match_etype(&pe, 0, ETH_P_EDSA);
+		mvpp2_prs_match_etype(&pe, 2, 0);
+
+		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DSA_MASK,
+					 MVPP2_PRS_RI_DSA_MASK);
+		/* Shift ethertype + 2 byte reserved + tag*/
+		mvpp2_prs_sram_shift_set(&pe, 2 + MVPP2_ETH_TYPE_LEN + shift,
+					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+		/* Update shadow table */
+		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_DSA);
+
+		if (tagged) {
+			/* Set tagged bit in DSA tag */
+			mvpp2_prs_tcam_data_byte_set(&pe,
+						     MVPP2_ETH_TYPE_LEN + 2 + 3,
+						 MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
+						 MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
+			/* Clear all ai bits for next iteration */
+			mvpp2_prs_sram_ai_update(&pe, 0,
+						 MVPP2_PRS_SRAM_AI_MASK);
+			/* If packet is tagged continue check vlans */
+			mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+		} else {
+			/* Set result info bits to 'no vlans' */
+			mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE,
+						 MVPP2_PRS_RI_VLAN_MASK);
+			mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+		}
+		/* Mask/unmask all ports, depending on dsa type */
+		mvpp2_prs_tcam_port_map_set(&pe, port_mask);
+	}
+
+	/* Update port mask */
+	mvpp2_prs_tcam_port_set(&pe, port, add);
+
+	mvpp2_prs_hw_write(priv, &pe);
+}
+
+/* Search for existing single/triple vlan entry */
+static struct mvpp2_prs_entry *mvpp2_prs_vlan_find(struct mvpp2 *priv,
+						   unsigned short tpid, int ai)
+{
+	struct mvpp2_prs_entry *pe;
+	int tid;
+
+	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+	if (!pe)
+		return NULL;
+	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_VLAN);
+
+	/* Go through the all entries with MVPP2_PRS_LU_VLAN */
+	for (tid = MVPP2_PE_FIRST_FREE_TID;
+	     tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
+		unsigned int ri_bits, ai_bits;
+		bool match;
+
+		if (!priv->prs_shadow[tid].valid ||
+		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VLAN)
+			continue;
+
+		pe->index = tid;
+
+		mvpp2_prs_hw_read(priv, pe);
+		match = mvpp2_prs_tcam_data_cmp(pe, 0, swab16(tpid));
+		if (!match)
+			continue;
+
+		/* Get vlan type */
+		ri_bits = mvpp2_prs_sram_ri_get(pe);
+		ri_bits &= MVPP2_PRS_RI_VLAN_MASK;
+
+		/* Get current ai value from tcam */
+		ai_bits = mvpp2_prs_tcam_ai_get(pe);
+		/* Clear double vlan bit */
+		ai_bits &= ~MVPP2_PRS_DBL_VLAN_AI_BIT;
+
+		if (ai != ai_bits)
+			continue;
+
+		if (ri_bits == MVPP2_PRS_RI_VLAN_SINGLE ||
+		    ri_bits == MVPP2_PRS_RI_VLAN_TRIPLE)
+			return pe;
+	}
+	kfree(pe);
+
+	return NULL;
+}
+
+/* Add/update single/triple vlan entry */
+static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai,
+			      unsigned int port_map)
+{
+	struct mvpp2_prs_entry *pe;
+	int tid_aux, tid;
+
+	pe = mvpp2_prs_vlan_find(priv, tpid, ai);
+
+	if (!pe) {
+		/* Create new tcam entry */
+		tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_LAST_FREE_TID,
+						MVPP2_PE_FIRST_FREE_TID);
+		if (tid < 0)
+			return tid;
+
+		pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+		if (!pe)
+			return -ENOMEM;
+
+		/* Get last double vlan tid */
+		for (tid_aux = MVPP2_PE_LAST_FREE_TID;
+		     tid_aux >= MVPP2_PE_FIRST_FREE_TID; tid_aux--) {
+			unsigned int ri_bits;
+
+			if (!priv->prs_shadow[tid_aux].valid ||
+			    priv->prs_shadow[tid_aux].lu != MVPP2_PRS_LU_VLAN)
+				continue;
+
+			pe->index = tid_aux;
+			mvpp2_prs_hw_read(priv, pe);
+			ri_bits = mvpp2_prs_sram_ri_get(pe);
+			if ((ri_bits & MVPP2_PRS_RI_VLAN_MASK) ==
+			    MVPP2_PRS_RI_VLAN_DOUBLE)
+				break;
+		}
+
+		if (tid <= tid_aux)
+			return -EINVAL;
+
+		memset(pe, 0 , sizeof(struct mvpp2_prs_entry));
+		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_VLAN);
+		pe->index = tid;
+
+		mvpp2_prs_match_etype(pe, 0, tpid);
+
+		mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_L2);
+		/* Shift 4 bytes - skip 1 vlan tag */
+		mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
+					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+		/* Clear all ai bits for next iteration */
+		mvpp2_prs_sram_ai_update(pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+		if (ai == MVPP2_PRS_SINGLE_VLAN_AI) {
+			mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_SINGLE,
+						 MVPP2_PRS_RI_VLAN_MASK);
+		} else {
+			ai |= MVPP2_PRS_DBL_VLAN_AI_BIT;
+			mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_TRIPLE,
+						 MVPP2_PRS_RI_VLAN_MASK);
+		}
+		mvpp2_prs_tcam_ai_update(pe, ai, MVPP2_PRS_SRAM_AI_MASK);
+
+		mvpp2_prs_shadow_set(priv, pe->index, MVPP2_PRS_LU_VLAN);
+	}
+	/* Update ports' mask */
+	mvpp2_prs_tcam_port_map_set(pe, port_map);
+
+	mvpp2_prs_hw_write(priv, pe);
+
+	kfree(pe);
+
+	return 0;
+}
+
+/* Get first free double vlan ai number */
+static int mvpp2_prs_double_vlan_ai_free_get(struct mvpp2 *priv)
+{
+	int i;
+
+	for (i = 1; i < MVPP2_PRS_DBL_VLANS_MAX; i++) {
+		if (!priv->prs_double_vlans[i])
+			return i;
+	}
+
+	return -EINVAL;
+}
+
+/* Search for existing double vlan entry */
+static struct mvpp2_prs_entry *mvpp2_prs_double_vlan_find(struct mvpp2 *priv,
+							  unsigned short tpid1,
+							  unsigned short tpid2)
+{
+	struct mvpp2_prs_entry *pe;
+	int tid;
+
+	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+	if (!pe)
+		return NULL;
+	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_VLAN);
+
+	/* Go through the all entries with MVPP2_PRS_LU_VLAN */
+	for (tid = MVPP2_PE_FIRST_FREE_TID;
+	     tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
+		unsigned int ri_mask;
+		bool match;
+
+		if (!priv->prs_shadow[tid].valid ||
+		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VLAN)
+			continue;
+
+		pe->index = tid;
+		mvpp2_prs_hw_read(priv, pe);
+
+		match = mvpp2_prs_tcam_data_cmp(pe, 0, swab16(tpid1))
+			&& mvpp2_prs_tcam_data_cmp(pe, 4, swab16(tpid2));
+
+		if (!match)
+			continue;
+
+		ri_mask = mvpp2_prs_sram_ri_get(pe) & MVPP2_PRS_RI_VLAN_MASK;
+		if (ri_mask == MVPP2_PRS_RI_VLAN_DOUBLE)
+			return pe;
+	}
+	kfree(pe);
+
+	return NULL;
+}
+
+/* Add or update double vlan entry */
+static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1,
+				     unsigned short tpid2,
+				     unsigned int port_map)
+{
+	struct mvpp2_prs_entry *pe;
+	int tid_aux, tid, ai;
+
+	pe = mvpp2_prs_double_vlan_find(priv, tpid1, tpid2);
+
+	if (!pe) {
+		/* Create new tcam entry */
+		tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+				MVPP2_PE_LAST_FREE_TID);
+		if (tid < 0)
+			return tid;
+
+		pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+		if (!pe)
+			return -ENOMEM;
+
+		/* Set ai value for new double vlan entry */
+		ai = mvpp2_prs_double_vlan_ai_free_get(priv);
+		if (ai < 0)
+			return ai;
+
+		/* Get first single/triple vlan tid */
+		for (tid_aux = MVPP2_PE_FIRST_FREE_TID;
+		     tid_aux <= MVPP2_PE_LAST_FREE_TID; tid_aux++) {
+			unsigned int ri_bits;
+
+			if (!priv->prs_shadow[tid_aux].valid ||
+			    priv->prs_shadow[tid_aux].lu != MVPP2_PRS_LU_VLAN)
+				continue;
+
+			pe->index = tid_aux;
+			mvpp2_prs_hw_read(priv, pe);
+			ri_bits = mvpp2_prs_sram_ri_get(pe);
+			ri_bits &= MVPP2_PRS_RI_VLAN_MASK;
+			if (ri_bits == MVPP2_PRS_RI_VLAN_SINGLE ||
+			    ri_bits == MVPP2_PRS_RI_VLAN_TRIPLE)
+				break;
+		}
+
+		if (tid >= tid_aux)
+			return -ERANGE;
+
+		memset(pe, 0, sizeof(struct mvpp2_prs_entry));
+		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_VLAN);
+		pe->index = tid;
+
+		priv->prs_double_vlans[ai] = true;
+
+		mvpp2_prs_match_etype(pe, 0, tpid1);
+		mvpp2_prs_match_etype(pe, 4, tpid2);
+
+		mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VLAN);
+		/* Shift 8 bytes - skip 2 vlan tags */
+		mvpp2_prs_sram_shift_set(pe, 2 * MVPP2_VLAN_TAG_LEN,
+					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+		mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_DOUBLE,
+					 MVPP2_PRS_RI_VLAN_MASK);
+		mvpp2_prs_sram_ai_update(pe, ai | MVPP2_PRS_DBL_VLAN_AI_BIT,
+					 MVPP2_PRS_SRAM_AI_MASK);
+
+		mvpp2_prs_shadow_set(priv, pe->index, MVPP2_PRS_LU_VLAN);
+	}
+
+	/* Update ports' mask */
+	mvpp2_prs_tcam_port_map_set(pe, port_map);
+	mvpp2_prs_hw_write(priv, pe);
+
+	kfree(pe);
+	return 0;
+}
+
+/* IPv4 header parsing for fragmentation and L4 offset */
+static int mvpp2_prs_ip4_proto(struct mvpp2 *priv, unsigned short proto,
+			       unsigned int ri, unsigned int ri_mask)
+{
+	struct mvpp2_prs_entry pe;
+	int tid;
+
+	if ((proto != IPPROTO_TCP) && (proto != IPPROTO_UDP) &&
+	    (proto != IPPROTO_IGMP))
+		return -EINVAL;
+
+	/* Fragmented packet */
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_IP4);
+	pe.index = tid;
+
+	/* Set next lu to IPv4 */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4);
+	mvpp2_prs_sram_shift_set(&pe, 12, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+	/* Set L4 offset */
+	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4,
+				  sizeof(struct iphdr) - 4,
+				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+	mvpp2_prs_sram_ai_update(&pe, MVPP2_PRS_IPV4_DIP_AI_BIT,
+				 MVPP2_PRS_IPV4_DIP_AI_BIT);
+	mvpp2_prs_sram_ri_update(&pe, ri | MVPP2_PRS_RI_IP_FRAG_MASK,
+				 ri_mask | MVPP2_PRS_RI_IP_FRAG_MASK);
+
+	mvpp2_prs_tcam_data_byte_set(&pe, 5, proto, MVPP2_PRS_TCAM_PROTO_MASK);
+	mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT);
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_IP4);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Not fragmented packet */
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	pe.index = tid;
+	/* Clear ri before updating */
+	pe.sram.word[MVPP2_PRS_SRAM_RI_WORD] = 0x0;
+	pe.sram.word[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0;
+	mvpp2_prs_sram_ri_update(&pe, ri, ri_mask);
+
+	mvpp2_prs_tcam_data_byte_set(&pe, 2, 0x00, MVPP2_PRS_TCAM_PROTO_MASK_L);
+	mvpp2_prs_tcam_data_byte_set(&pe, 3, 0x00, MVPP2_PRS_TCAM_PROTO_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_IP4);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	return 0;
+}
+
+/* IPv4 L3 multicast or broadcast */
+static int mvpp2_prs_ip4_cast(struct mvpp2 *priv, unsigned short l3_cast)
+{
+	struct mvpp2_prs_entry pe;
+	int mask, tid;
+
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_IP4);
+	pe.index = tid;
+
+	switch (l3_cast) {
+	case MVPP2_PRS_L3_MULTI_CAST:
+		mvpp2_prs_tcam_data_byte_set(&pe, 0, MVPP2_PRS_IPV4_MC,
+					     MVPP2_PRS_IPV4_MC_MASK);
+		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_MCAST,
+					 MVPP2_PRS_RI_L3_ADDR_MASK);
+		break;
+	case  MVPP2_PRS_L3_BROAD_CAST:
+		mask = MVPP2_PRS_IPV4_BC_MASK;
+		mvpp2_prs_tcam_data_byte_set(&pe, 0, mask, mask);
+		mvpp2_prs_tcam_data_byte_set(&pe, 1, mask, mask);
+		mvpp2_prs_tcam_data_byte_set(&pe, 2, mask, mask);
+		mvpp2_prs_tcam_data_byte_set(&pe, 3, mask, mask);
+		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_BCAST,
+					 MVPP2_PRS_RI_L3_ADDR_MASK);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Finished: go to flowid generation */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
+
+	mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_IPV4_DIP_AI_BIT,
+				 MVPP2_PRS_IPV4_DIP_AI_BIT);
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_IP4);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	return 0;
+}
+
+/* Set entries for protocols over IPv6  */
+static int mvpp2_prs_ip6_proto(struct mvpp2 *priv, unsigned short proto,
+			       unsigned int ri, unsigned int ri_mask)
+{
+	struct mvpp2_prs_entry pe;
+	int tid;
+
+	if ((proto != IPPROTO_TCP) && (proto != IPPROTO_UDP) &&
+	    (proto != IPPROTO_ICMPV6) && (proto != IPPROTO_IPIP))
+		return -EINVAL;
+
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_IP6);
+	pe.index = tid;
+
+	/* Finished: go to flowid generation */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
+	mvpp2_prs_sram_ri_update(&pe, ri, ri_mask);
+	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4,
+				  sizeof(struct ipv6hdr) - 6,
+				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+
+	mvpp2_prs_tcam_data_byte_set(&pe, 0, proto, MVPP2_PRS_TCAM_PROTO_MASK);
+	mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_IPV6_NO_EXT_AI_BIT,
+				 MVPP2_PRS_IPV6_NO_EXT_AI_BIT);
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Write HW */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_IP6);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	return 0;
+}
+
+/* IPv6 L3 multicast entry */
+static int mvpp2_prs_ip6_cast(struct mvpp2 *priv, unsigned short l3_cast)
+{
+	struct mvpp2_prs_entry pe;
+	int tid;
+
+	if (l3_cast != MVPP2_PRS_L3_MULTI_CAST)
+		return -EINVAL;
+
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_IP6);
+	pe.index = tid;
+
+	/* Finished: go to flowid generation */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP6);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_MCAST,
+				 MVPP2_PRS_RI_L3_ADDR_MASK);
+	mvpp2_prs_sram_ai_update(&pe, MVPP2_PRS_IPV6_NO_EXT_AI_BIT,
+				 MVPP2_PRS_IPV6_NO_EXT_AI_BIT);
+	/* Shift back to IPv6 NH */
+	mvpp2_prs_sram_shift_set(&pe, -18, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	mvpp2_prs_tcam_data_byte_set(&pe, 0, MVPP2_PRS_IPV6_MC,
+				     MVPP2_PRS_IPV6_MC_MASK);
+	mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_IPV6_NO_EXT_AI_BIT);
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_IP6);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	return 0;
+}
+
+/* Parser per-port initialization */
+static void mvpp2_prs_hw_port_init(struct mvpp2 *priv, int port, int lu_first,
+				   int lu_max, int offset)
+{
+	u32 val;
+
+	/* Set lookup ID */
+	val = mvpp2_read(priv, MVPP2_PRS_INIT_LOOKUP_REG);
+	val &= ~MVPP2_PRS_PORT_LU_MASK(port);
+	val |=  MVPP2_PRS_PORT_LU_VAL(port, lu_first);
+	mvpp2_write(priv, MVPP2_PRS_INIT_LOOKUP_REG, val);
+
+	/* Set maximum number of loops for packet received from port */
+	val = mvpp2_read(priv, MVPP2_PRS_MAX_LOOP_REG(port));
+	val &= ~MVPP2_PRS_MAX_LOOP_MASK(port);
+	val |= MVPP2_PRS_MAX_LOOP_VAL(port, lu_max);
+	mvpp2_write(priv, MVPP2_PRS_MAX_LOOP_REG(port), val);
+
+	/* Set initial offset for packet header extraction for the first
+	 * searching loop
+	 */
+	val = mvpp2_read(priv, MVPP2_PRS_INIT_OFFS_REG(port));
+	val &= ~MVPP2_PRS_INIT_OFF_MASK(port);
+	val |= MVPP2_PRS_INIT_OFF_VAL(port, offset);
+	mvpp2_write(priv, MVPP2_PRS_INIT_OFFS_REG(port), val);
+}
+
+/* Default flow entries initialization for all ports */
+static void mvpp2_prs_def_flow_init(struct mvpp2 *priv)
+{
+	struct mvpp2_prs_entry pe;
+	int port;
+
+	for (port = 0; port < MVPP2_MAX_PORTS; port++) {
+		memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+		pe.index = MVPP2_PE_FIRST_DEFAULT_FLOW - port;
+
+		/* Mask all ports */
+		mvpp2_prs_tcam_port_map_set(&pe, 0);
+
+		/* Set flow ID*/
+		mvpp2_prs_sram_ai_update(&pe, port, MVPP2_PRS_FLOW_ID_MASK);
+		mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_DONE_BIT, 1);
+
+		/* Update shadow table and hw entry */
+		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_FLOWS);
+		mvpp2_prs_hw_write(priv, &pe);
+	}
+}
+
+/* Set default entry for Marvell Header field */
+static void mvpp2_prs_mh_init(struct mvpp2 *priv)
+{
+	struct mvpp2_prs_entry pe;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+
+	pe.index = MVPP2_PE_MH_DEFAULT;
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MH);
+	mvpp2_prs_sram_shift_set(&pe, MVPP2_MH_SIZE,
+				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_MAC);
+
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MH);
+	mvpp2_prs_hw_write(priv, &pe);
+}
+
+/* Set default entires (place holder) for promiscuous, non-promiscuous and
+ * multicast MAC addresses
+ */
+static void mvpp2_prs_mac_init(struct mvpp2 *priv)
+{
+	struct mvpp2_prs_entry pe;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+
+	/* Non-promiscuous mode for all ports - DROP unknown packets */
+	pe.index = MVPP2_PE_MAC_NON_PROMISCUOUS;
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
+
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK,
+				 MVPP2_PRS_RI_DROP_MASK);
+	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* place holders only - no ports */
+	mvpp2_prs_mac_drop_all_set(priv, 0, false);
+	mvpp2_prs_mac_promisc_set(priv, 0, false);
+	mvpp2_prs_mac_multi_set(priv, MVPP2_PE_MAC_MC_ALL, 0, false);
+	mvpp2_prs_mac_multi_set(priv, MVPP2_PE_MAC_MC_IP6, 0, false);
+}
+
+/* Set default entries for various types of dsa packets */
+static void mvpp2_prs_dsa_init(struct mvpp2 *priv)
+{
+	struct mvpp2_prs_entry pe;
+
+	/* None tagged EDSA entry - place holder */
+	mvpp2_prs_dsa_tag_set(priv, 0, false, MVPP2_PRS_UNTAGGED,
+			      MVPP2_PRS_EDSA);
+
+	/* Tagged EDSA entry - place holder */
+	mvpp2_prs_dsa_tag_set(priv, 0, false, MVPP2_PRS_TAGGED, MVPP2_PRS_EDSA);
+
+	/* None tagged DSA entry - place holder */
+	mvpp2_prs_dsa_tag_set(priv, 0, false, MVPP2_PRS_UNTAGGED,
+			      MVPP2_PRS_DSA);
+
+	/* Tagged DSA entry - place holder */
+	mvpp2_prs_dsa_tag_set(priv, 0, false, MVPP2_PRS_TAGGED, MVPP2_PRS_DSA);
+
+	/* None tagged EDSA ethertype entry - place holder*/
+	mvpp2_prs_dsa_tag_ethertype_set(priv, 0, false,
+					MVPP2_PRS_UNTAGGED, MVPP2_PRS_EDSA);
+
+	/* Tagged EDSA ethertype entry - place holder*/
+	mvpp2_prs_dsa_tag_ethertype_set(priv, 0, false,
+					MVPP2_PRS_TAGGED, MVPP2_PRS_EDSA);
+
+	/* None tagged DSA ethertype entry */
+	mvpp2_prs_dsa_tag_ethertype_set(priv, 0, true,
+					MVPP2_PRS_UNTAGGED, MVPP2_PRS_DSA);
+
+	/* Tagged DSA ethertype entry */
+	mvpp2_prs_dsa_tag_ethertype_set(priv, 0, true,
+					MVPP2_PRS_TAGGED, MVPP2_PRS_DSA);
+
+	/* Set default entry, in case DSA or EDSA tag not found */
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA);
+	pe.index = MVPP2_PE_DSA_DEFAULT;
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+
+	/* Shift 0 bytes */
+	mvpp2_prs_sram_shift_set(&pe, 0, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
+
+	/* Clear all sram ai bits for next iteration */
+	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	mvpp2_prs_hw_write(priv, &pe);
+}
+
+/* Match basic ethertypes */
+static int mvpp2_prs_etype_init(struct mvpp2 *priv)
+{
+	struct mvpp2_prs_entry pe;
+	int tid;
+
+	/* Ethertype: PPPoE */
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_L2);
+	pe.index = tid;
+
+	mvpp2_prs_match_etype(&pe, 0, ETH_P_PPP_SES);
+
+	mvpp2_prs_sram_shift_set(&pe, MVPP2_PPPOE_HDR_SIZE,
+				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_PPPOE);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_PPPOE_MASK,
+				 MVPP2_PRS_RI_PPPOE_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2);
+	priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF;
+	priv->prs_shadow[pe.index].finish = false;
+	mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_PPPOE_MASK,
+				MVPP2_PRS_RI_PPPOE_MASK);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Ethertype: ARP */
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_L2);
+	pe.index = tid;
+
+	mvpp2_prs_match_etype(&pe, 0, ETH_P_ARP);
+
+	/* Generate flow in the next iteration*/
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_ARP,
+				 MVPP2_PRS_RI_L3_PROTO_MASK);
+	/* Set L3 offset */
+	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3,
+				  MVPP2_ETH_TYPE_LEN,
+				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2);
+	priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF;
+	priv->prs_shadow[pe.index].finish = true;
+	mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_L3_ARP,
+				MVPP2_PRS_RI_L3_PROTO_MASK);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Ethertype: LBTD */
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_L2);
+	pe.index = tid;
+
+	mvpp2_prs_match_etype(&pe, 0, MVPP2_IP_LBDT_TYPE);
+
+	/* Generate flow in the next iteration*/
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_CPU_CODE_RX_SPEC |
+				 MVPP2_PRS_RI_UDF3_RX_SPECIAL,
+				 MVPP2_PRS_RI_CPU_CODE_MASK |
+				 MVPP2_PRS_RI_UDF3_MASK);
+	/* Set L3 offset */
+	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3,
+				  MVPP2_ETH_TYPE_LEN,
+				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2);
+	priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF;
+	priv->prs_shadow[pe.index].finish = true;
+	mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_CPU_CODE_RX_SPEC |
+				MVPP2_PRS_RI_UDF3_RX_SPECIAL,
+				MVPP2_PRS_RI_CPU_CODE_MASK |
+				MVPP2_PRS_RI_UDF3_MASK);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Ethertype: IPv4 without options */
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_L2);
+	pe.index = tid;
+
+	mvpp2_prs_match_etype(&pe, 0, ETH_P_IP);
+	mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN,
+				     MVPP2_PRS_IPV4_HEAD | MVPP2_PRS_IPV4_IHL,
+				     MVPP2_PRS_IPV4_HEAD_MASK |
+				     MVPP2_PRS_IPV4_IHL_MASK);
+
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4,
+				 MVPP2_PRS_RI_L3_PROTO_MASK);
+	/* Skip eth_type + 4 bytes of IP header */
+	mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + 4,
+				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+	/* Set L3 offset */
+	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3,
+				  MVPP2_ETH_TYPE_LEN,
+				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2);
+	priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF;
+	priv->prs_shadow[pe.index].finish = false;
+	mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_L3_IP4,
+				MVPP2_PRS_RI_L3_PROTO_MASK);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Ethertype: IPv4 with options */
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	pe.index = tid;
+
+	/* Clear tcam data before updating */
+	pe.tcam.byte[MVPP2_PRS_TCAM_DATA_BYTE(MVPP2_ETH_TYPE_LEN)] = 0x0;
+	pe.tcam.byte[MVPP2_PRS_TCAM_DATA_BYTE_EN(MVPP2_ETH_TYPE_LEN)] = 0x0;
+
+	mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN,
+				     MVPP2_PRS_IPV4_HEAD,
+				     MVPP2_PRS_IPV4_HEAD_MASK);
+
+	/* Clear ri before updating */
+	pe.sram.word[MVPP2_PRS_SRAM_RI_WORD] = 0x0;
+	pe.sram.word[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0;
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4_OPT,
+				 MVPP2_PRS_RI_L3_PROTO_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2);
+	priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF;
+	priv->prs_shadow[pe.index].finish = false;
+	mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_L3_IP4_OPT,
+				MVPP2_PRS_RI_L3_PROTO_MASK);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Ethertype: IPv6 without options */
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_L2);
+	pe.index = tid;
+
+	mvpp2_prs_match_etype(&pe, 0, ETH_P_IPV6);
+
+	/* Skip DIP of IPV6 header */
+	mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + 8 +
+				 MVPP2_MAX_L3_ADDR_SIZE,
+				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP6);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP6,
+				 MVPP2_PRS_RI_L3_PROTO_MASK);
+	/* Set L3 offset */
+	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3,
+				  MVPP2_ETH_TYPE_LEN,
+				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2);
+	priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF;
+	priv->prs_shadow[pe.index].finish = false;
+	mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_L3_IP6,
+				MVPP2_PRS_RI_L3_PROTO_MASK);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Default entry for MVPP2_PRS_LU_L2 - Unknown ethtype */
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_L2);
+	pe.index = MVPP2_PE_ETH_TYPE_UN;
+
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Generate flow in the next iteration*/
+	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_UN,
+				 MVPP2_PRS_RI_L3_PROTO_MASK);
+	/* Set L3 offset even it's unknown L3 */
+	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3,
+				  MVPP2_ETH_TYPE_LEN,
+				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2);
+	priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF;
+	priv->prs_shadow[pe.index].finish = true;
+	mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_L3_UN,
+				MVPP2_PRS_RI_L3_PROTO_MASK);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	return 0;
+}
+
+/* Configure vlan entries and detect up to 2 successive VLAN tags.
+ * Possible options:
+ * 0x8100, 0x88A8
+ * 0x8100, 0x8100
+ * 0x8100
+ * 0x88A8
+ */
+static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv)
+{
+	struct mvpp2_prs_entry pe;
+	int err;
+
+	priv->prs_double_vlans = devm_kcalloc(&pdev->dev, sizeof(bool),
+					      MVPP2_PRS_DBL_VLANS_MAX,
+					      GFP_KERNEL);
+	if (!priv->prs_double_vlans)
+		return -ENOMEM;
+
+	/* Double VLAN: 0x8100, 0x88A8 */
+	err = mvpp2_prs_double_vlan_add(priv, ETH_P_8021Q, ETH_P_8021AD,
+					MVPP2_PRS_PORT_MASK);
+	if (err)
+		return err;
+
+	/* Double VLAN: 0x8100, 0x8100 */
+	err = mvpp2_prs_double_vlan_add(priv, ETH_P_8021Q, ETH_P_8021Q,
+					MVPP2_PRS_PORT_MASK);
+	if (err)
+		return err;
+
+	/* Single VLAN: 0x88a8 */
+	err = mvpp2_prs_vlan_add(priv, ETH_P_8021AD, MVPP2_PRS_SINGLE_VLAN_AI,
+				 MVPP2_PRS_PORT_MASK);
+	if (err)
+		return err;
+
+	/* Single VLAN: 0x8100 */
+	err = mvpp2_prs_vlan_add(priv, ETH_P_8021Q, MVPP2_PRS_SINGLE_VLAN_AI,
+				 MVPP2_PRS_PORT_MASK);
+	if (err)
+		return err;
+
+	/* Set default double vlan entry */
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+	pe.index = MVPP2_PE_VLAN_DBL;
+
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+	/* Clear ai for next iterations */
+	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE,
+				 MVPP2_PRS_RI_VLAN_MASK);
+
+	mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_DBL_VLAN_AI_BIT,
+				 MVPP2_PRS_DBL_VLAN_AI_BIT);
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VLAN);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Set default vlan none entry */
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+	pe.index = MVPP2_PE_VLAN_NONE;
+
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE,
+				 MVPP2_PRS_RI_VLAN_MASK);
+
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VLAN);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	return 0;
+}
+
+/* Set entries for PPPoE ethertype */
+static int mvpp2_prs_pppoe_init(struct mvpp2 *priv)
+{
+	struct mvpp2_prs_entry pe;
+	int tid;
+
+	/* IPv4 over PPPoE with options */
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_PPPOE);
+	pe.index = tid;
+
+	mvpp2_prs_match_etype(&pe, 0, PPP_IP);
+
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4_OPT,
+				 MVPP2_PRS_RI_L3_PROTO_MASK);
+	/* Skip eth_type + 4 bytes of IP header */
+	mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + 4,
+				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+	/* Set L3 offset */
+	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3,
+				  MVPP2_ETH_TYPE_LEN,
+				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* IPv4 over PPPoE without options */
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	pe.index = tid;
+
+	mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN,
+				     MVPP2_PRS_IPV4_HEAD | MVPP2_PRS_IPV4_IHL,
+				     MVPP2_PRS_IPV4_HEAD_MASK |
+				     MVPP2_PRS_IPV4_IHL_MASK);
+
+	/* Clear ri before updating */
+	pe.sram.word[MVPP2_PRS_SRAM_RI_WORD] = 0x0;
+	pe.sram.word[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0;
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4,
+				 MVPP2_PRS_RI_L3_PROTO_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* IPv6 over PPPoE */
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_PPPOE);
+	pe.index = tid;
+
+	mvpp2_prs_match_etype(&pe, 0, PPP_IPV6);
+
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP6);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP6,
+				 MVPP2_PRS_RI_L3_PROTO_MASK);
+	/* Skip eth_type + 4 bytes of IPv6 header */
+	mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + 4,
+				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+	/* Set L3 offset */
+	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3,
+				  MVPP2_ETH_TYPE_LEN,
+				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Non-IP over PPPoE */
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_PPPOE);
+	pe.index = tid;
+
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_UN,
+				 MVPP2_PRS_RI_L3_PROTO_MASK);
+
+	/* Finished: go to flowid generation */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
+	/* Set L3 offset even if it's unknown L3 */
+	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3,
+				  MVPP2_ETH_TYPE_LEN,
+				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	return 0;
+}
+
+/* Initialize entries for IPv4 */
+static int mvpp2_prs_ip4_init(struct mvpp2 *priv)
+{
+	struct mvpp2_prs_entry pe;
+	int err;
+
+	/* Set entries for TCP, UDP and IGMP over IPv4 */
+	err = mvpp2_prs_ip4_proto(priv, IPPROTO_TCP, MVPP2_PRS_RI_L4_TCP,
+				  MVPP2_PRS_RI_L4_PROTO_MASK);
+	if (err)
+		return err;
+
+	err = mvpp2_prs_ip4_proto(priv, IPPROTO_UDP, MVPP2_PRS_RI_L4_UDP,
+				  MVPP2_PRS_RI_L4_PROTO_MASK);
+	if (err)
+		return err;
+
+	err = mvpp2_prs_ip4_proto(priv, IPPROTO_IGMP,
+				  MVPP2_PRS_RI_CPU_CODE_RX_SPEC |
+				  MVPP2_PRS_RI_UDF3_RX_SPECIAL,
+				  MVPP2_PRS_RI_CPU_CODE_MASK |
+				  MVPP2_PRS_RI_UDF3_MASK);
+	if (err)
+		return err;
+
+	/* IPv4 Broadcast */
+	err = mvpp2_prs_ip4_cast(priv, MVPP2_PRS_L3_BROAD_CAST);
+	if (err)
+		return err;
+
+	/* IPv4 Multicast */
+	err = mvpp2_prs_ip4_cast(priv, MVPP2_PRS_L3_MULTI_CAST);
+	if (err)
+		return err;
+
+	/* Default IPv4 entry for unknown protocols */
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_IP4);
+	pe.index = MVPP2_PE_IP4_PROTO_UN;
+
+	/* Set next lu to IPv4 */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4);
+	mvpp2_prs_sram_shift_set(&pe, 12, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+	/* Set L4 offset */
+	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4,
+				  sizeof(struct iphdr) - 4,
+				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+	mvpp2_prs_sram_ai_update(&pe, MVPP2_PRS_IPV4_DIP_AI_BIT,
+				 MVPP2_PRS_IPV4_DIP_AI_BIT);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L4_OTHER,
+				 MVPP2_PRS_RI_L4_PROTO_MASK);
+
+	mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT);
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_IP4);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Default IPv4 entry for unicast address */
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_IP4);
+	pe.index = MVPP2_PE_IP4_ADDR_UN;
+
+	/* Finished: go to flowid generation */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_UCAST,
+				 MVPP2_PRS_RI_L3_ADDR_MASK);
+
+	mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_IPV4_DIP_AI_BIT,
+				 MVPP2_PRS_IPV4_DIP_AI_BIT);
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_IP4);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	return 0;
+}
+
+/* Initialize entries for IPv6 */
+static int mvpp2_prs_ip6_init(struct mvpp2 *priv)
+{
+	struct mvpp2_prs_entry pe;
+	int tid, err;
+
+	/* Set entries for TCP, UDP and ICMP over IPv6 */
+	err = mvpp2_prs_ip6_proto(priv, IPPROTO_TCP,
+				  MVPP2_PRS_RI_L4_TCP,
+				  MVPP2_PRS_RI_L4_PROTO_MASK);
+	if (err)
+		return err;
+
+	err = mvpp2_prs_ip6_proto(priv, IPPROTO_UDP,
+				  MVPP2_PRS_RI_L4_UDP,
+				  MVPP2_PRS_RI_L4_PROTO_MASK);
+	if (err)
+		return err;
+
+	err = mvpp2_prs_ip6_proto(priv, IPPROTO_ICMPV6,
+				  MVPP2_PRS_RI_CPU_CODE_RX_SPEC |
+				  MVPP2_PRS_RI_UDF3_RX_SPECIAL,
+				  MVPP2_PRS_RI_CPU_CODE_MASK |
+				  MVPP2_PRS_RI_UDF3_MASK);
+	if (err)
+		return err;
+
+	/* IPv4 is the last header. This is similar case as 6-TCP or 17-UDP */
+	/* Result Info: UDF7=1, DS lite */
+	err = mvpp2_prs_ip6_proto(priv, IPPROTO_IPIP,
+				  MVPP2_PRS_RI_UDF7_IP6_LITE,
+				  MVPP2_PRS_RI_UDF7_MASK);
+	if (err)
+		return err;
+
+	/* IPv6 multicast */
+	err = mvpp2_prs_ip6_cast(priv, MVPP2_PRS_L3_MULTI_CAST);
+	if (err)
+		return err;
+
+	/* Entry for checking hop limit */
+	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+					MVPP2_PE_LAST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_IP6);
+	pe.index = tid;
+
+	/* Finished: go to flowid generation */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_UN |
+				 MVPP2_PRS_RI_DROP_MASK,
+				 MVPP2_PRS_RI_L3_PROTO_MASK |
+				 MVPP2_PRS_RI_DROP_MASK);
+
+	mvpp2_prs_tcam_data_byte_set(&pe, 1, 0x00, MVPP2_PRS_IPV6_HOP_MASK);
+	mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_IPV6_NO_EXT_AI_BIT,
+				 MVPP2_PRS_IPV6_NO_EXT_AI_BIT);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_IP4);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Default IPv6 entry for unknown protocols */
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_IP6);
+	pe.index = MVPP2_PE_IP6_PROTO_UN;
+
+	/* Finished: go to flowid generation */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L4_OTHER,
+				 MVPP2_PRS_RI_L4_PROTO_MASK);
+	/* Set L4 offset relatively to our current place */
+	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4,
+				  sizeof(struct ipv6hdr) - 4,
+				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+
+	mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_IPV6_NO_EXT_AI_BIT,
+				 MVPP2_PRS_IPV6_NO_EXT_AI_BIT);
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_IP4);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Default IPv6 entry for unknown ext protocols */
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_IP6);
+	pe.index = MVPP2_PE_IP6_EXT_PROTO_UN;
+
+	/* Finished: go to flowid generation */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L4_OTHER,
+				 MVPP2_PRS_RI_L4_PROTO_MASK);
+
+	mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_IPV6_EXT_AI_BIT,
+				 MVPP2_PRS_IPV6_EXT_AI_BIT);
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_IP4);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Default IPv6 entry for unicast address */
+	memset(&pe, 0, sizeof(struct mvpp2_prs_entry));
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_IP6);
+	pe.index = MVPP2_PE_IP6_ADDR_UN;
+
+	/* Finished: go to IPv6 again */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP6);
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_UCAST,
+				 MVPP2_PRS_RI_L3_ADDR_MASK);
+	mvpp2_prs_sram_ai_update(&pe, MVPP2_PRS_IPV6_NO_EXT_AI_BIT,
+				 MVPP2_PRS_IPV6_NO_EXT_AI_BIT);
+	/* Shift back to IPV6 NH */
+	mvpp2_prs_sram_shift_set(&pe, -18, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_IPV6_NO_EXT_AI_BIT);
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_IP6);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	return 0;
+}
+
+/* Parser default initialization */
+static int mvpp2_prs_default_init(struct platform_device *pdev,
+				  struct mvpp2 *priv)
+{
+	int err, index, i;
+
+	/* Enable tcam table */
+	mvpp2_write(priv, MVPP2_PRS_TCAM_CTRL_REG, MVPP2_PRS_TCAM_EN_MASK);
+
+	/* Clear all tcam and sram entries */
+	for (index = 0; index < MVPP2_PRS_TCAM_SRAM_SIZE; index++) {
+		mvpp2_write(priv, MVPP2_PRS_TCAM_IDX_REG, index);
+		for (i = 0; i < MVPP2_PRS_TCAM_WORDS; i++)
+			mvpp2_write(priv, MVPP2_PRS_TCAM_DATA_REG(i), 0);
+
+		mvpp2_write(priv, MVPP2_PRS_SRAM_IDX_REG, index);
+		for (i = 0; i < MVPP2_PRS_SRAM_WORDS; i++)
+			mvpp2_write(priv, MVPP2_PRS_SRAM_DATA_REG(i), 0);
+	}
+
+	/* Invalidate all tcam entries */
+	for (index = 0; index < MVPP2_PRS_TCAM_SRAM_SIZE; index++)
+		mvpp2_prs_hw_inv(priv, index);
+
+	priv->prs_shadow = devm_kcalloc(&pdev->dev, MVPP2_PRS_TCAM_SRAM_SIZE,
+					sizeof(struct mvpp2_prs_shadow),
+					GFP_KERNEL);
+	if (!priv->prs_shadow)
+		return -ENOMEM;
+
+	/* Always start from lookup = 0 */
+	for (index = 0; index < MVPP2_MAX_PORTS; index++)
+		mvpp2_prs_hw_port_init(priv, index, MVPP2_PRS_LU_MH,
+				       MVPP2_PRS_PORT_LU_MAX, 0);
+
+	mvpp2_prs_def_flow_init(priv);
+
+	mvpp2_prs_mh_init(priv);
+
+	mvpp2_prs_mac_init(priv);
+
+	mvpp2_prs_dsa_init(priv);
+
+	err = mvpp2_prs_etype_init(priv);
+	if (err)
+		return err;
+
+	err = mvpp2_prs_vlan_init(pdev, priv);
+	if (err)
+		return err;
+
+	err = mvpp2_prs_pppoe_init(priv);
+	if (err)
+		return err;
+
+	err = mvpp2_prs_ip6_init(priv);
+	if (err)
+		return err;
+
+	err = mvpp2_prs_ip4_init(priv);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/* Compare MAC DA with tcam entry data */
+static bool mvpp2_prs_mac_range_equals(struct mvpp2_prs_entry *pe,
+				       const u8 *da, unsigned char *mask)
+{
+	unsigned char tcam_byte, tcam_mask;
+	int index;
+
+	for (index = 0; index < ETH_ALEN; index++) {
+		mvpp2_prs_tcam_data_byte_get(pe, index, &tcam_byte, &tcam_mask);
+		if (tcam_mask != mask[index])
+			return false;
+
+		if ((tcam_mask & tcam_byte) != (da[index] & mask[index]))
+			return false;
+	}
+
+	return true;
+}
+
+/* Find tcam entry with matched pair <MAC DA, port> */
+static struct mvpp2_prs_entry *
+mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da,
+			    unsigned char *mask, int udf_type)
+{
+	struct mvpp2_prs_entry *pe;
+	int tid;
+
+	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+	if (!pe)
+		return NULL;
+	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC);
+
+	/* Go through the all entires with MVPP2_PRS_LU_MAC */
+	for (tid = MVPP2_PE_FIRST_FREE_TID;
+	     tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
+		unsigned int entry_pmap;
+
+		if (!priv->prs_shadow[tid].valid ||
+		    (priv->prs_shadow[tid].lu != MVPP2_PRS_LU_MAC) ||
+		    (priv->prs_shadow[tid].udf != udf_type))
+			continue;
+
+		pe->index = tid;
+		mvpp2_prs_hw_read(priv, pe);
+		entry_pmap = mvpp2_prs_tcam_port_map_get(pe);
+
+		if (mvpp2_prs_mac_range_equals(pe, da, mask) &&
+		    entry_pmap == pmap)
+			return pe;
+	}
+	kfree(pe);
+
+	return NULL;
+}
+
+/* Update parser's mac da entry */
+static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
+				   const u8 *da, bool add)
+{
+	struct mvpp2_prs_entry *pe;
+	unsigned int pmap, len, ri;
+	unsigned char mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	int tid;
+
+	/* Scan TCAM and see if entry with this <MAC DA, port> already exist */
+	pe = mvpp2_prs_mac_da_range_find(priv, (1 << port), da, mask,
+					 MVPP2_PRS_UDF_MAC_DEF);
+
+	/* No such entry */
+	if (!pe) {
+		if (!add)
+			return 0;
+
+		/* Create new TCAM entry */
+		/* Find first range mac entry*/
+		for (tid = MVPP2_PE_FIRST_FREE_TID;
+		     tid <= MVPP2_PE_LAST_FREE_TID; tid++)
+			if (priv->prs_shadow[tid].valid &&
+			    (priv->prs_shadow[tid].lu == MVPP2_PRS_LU_MAC) &&
+			    (priv->prs_shadow[tid].udf ==
+						       MVPP2_PRS_UDF_MAC_RANGE))
+				break;
+
+		/* Go through the all entries from first to last */
+		tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+						tid - 1);
+		if (tid < 0)
+			return tid;
+
+		pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+		if (!pe)
+			return -1;
+		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC);
+		pe->index = tid;
+
+		/* Mask all ports */
+		mvpp2_prs_tcam_port_map_set(pe, 0);
+	}
+
+	/* Update port mask */
+	mvpp2_prs_tcam_port_set(pe, port, add);
+
+	/* Invalidate the entry if no ports are left enabled */
+	pmap = mvpp2_prs_tcam_port_map_get(pe);
+	if (pmap == 0) {
+		if (add) {
+			kfree(pe);
+			return -1;
+		}
+		mvpp2_prs_hw_inv(priv, pe->index);
+		priv->prs_shadow[pe->index].valid = false;
+		kfree(pe);
+		return 0;
+	}
+
+	/* Continue - set next lookup */
+	mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_DSA);
+
+	/* Set match on DA */
+	len = ETH_ALEN;
+	while (len--)
+		mvpp2_prs_tcam_data_byte_set(pe, len, da[len], 0xff);
+
+	/* Set result info bits */
+	if (is_broadcast_ether_addr(da))
+		ri = MVPP2_PRS_RI_L2_BCAST;
+	else if (is_multicast_ether_addr(da))
+		ri = MVPP2_PRS_RI_L2_MCAST;
+	else
+		ri = MVPP2_PRS_RI_L2_UCAST | MVPP2_PRS_RI_MAC_ME_MASK;
+
+	mvpp2_prs_sram_ri_update(pe, ri, MVPP2_PRS_RI_L2_CAST_MASK |
+				 MVPP2_PRS_RI_MAC_ME_MASK);
+	mvpp2_prs_shadow_ri_set(priv, pe->index, ri, MVPP2_PRS_RI_L2_CAST_MASK |
+				MVPP2_PRS_RI_MAC_ME_MASK);
+
+	/* Shift to ethertype */
+	mvpp2_prs_sram_shift_set(pe, 2 * ETH_ALEN,
+				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	/* Update shadow table and hw entry */
+	priv->prs_shadow[pe->index].udf = MVPP2_PRS_UDF_MAC_DEF;
+	mvpp2_prs_shadow_set(priv, pe->index, MVPP2_PRS_LU_MAC);
+	mvpp2_prs_hw_write(priv, pe);
+
+	kfree(pe);
+
+	return 0;
+}
+
+static int mvpp2_prs_update_mac_da(struct net_device *dev, const u8 *da)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int err;
+
+	/* Remove old parser entry */
+	err = mvpp2_prs_mac_da_accept(port->priv, port->id, dev->dev_addr,
+				      false);
+	if (err)
+		return err;
+
+	/* Add new parser entry */
+	err = mvpp2_prs_mac_da_accept(port->priv, port->id, da, true);
+	if (err)
+		return err;
+
+	/* Set addr in the device */
+	ether_addr_copy(dev->dev_addr, da);
+
+	return 0;
+}
+
+/* Delete all port's multicast simple (not range) entries */
+static void mvpp2_prs_mcast_del_all(struct mvpp2 *priv, int port)
+{
+	struct mvpp2_prs_entry pe;
+	int index, tid;
+
+	for (tid = MVPP2_PE_FIRST_FREE_TID;
+	     tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
+		unsigned char da[ETH_ALEN], da_mask[ETH_ALEN];
+
+		if (!priv->prs_shadow[tid].valid ||
+		    (priv->prs_shadow[tid].lu != MVPP2_PRS_LU_MAC) ||
+		    (priv->prs_shadow[tid].udf != MVPP2_PRS_UDF_MAC_DEF))
+			continue;
+
+		/* Only simple mac entries */
+		pe.index = tid;
+		mvpp2_prs_hw_read(priv, &pe);
+
+		/* Read mac addr from entry */
+		for (index = 0; index < ETH_ALEN; index++)
+			mvpp2_prs_tcam_data_byte_get(&pe, index, &da[index],
+						     &da_mask[index]);
+
+		if (is_multicast_ether_addr(da) && !is_broadcast_ether_addr(da))
+			/* Delete this entry */
+			mvpp2_prs_mac_da_accept(priv, port, da, false);
+	}
+}
+
+static int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type)
+{
+	switch (type) {
+	case MVPP2_TAG_TYPE_EDSA:
+		/* Add port to EDSA entries */
+		mvpp2_prs_dsa_tag_set(priv, port, true,
+				      MVPP2_PRS_TAGGED, MVPP2_PRS_EDSA);
+		mvpp2_prs_dsa_tag_set(priv, port, true,
+				      MVPP2_PRS_UNTAGGED, MVPP2_PRS_EDSA);
+		/* Remove port from DSA entries */
+		mvpp2_prs_dsa_tag_set(priv, port, false,
+				      MVPP2_PRS_TAGGED, MVPP2_PRS_DSA);
+		mvpp2_prs_dsa_tag_set(priv, port, false,
+				      MVPP2_PRS_UNTAGGED, MVPP2_PRS_DSA);
+		break;
+
+	case MVPP2_TAG_TYPE_DSA:
+		/* Add port to DSA entries */
+		mvpp2_prs_dsa_tag_set(priv, port, true,
+				      MVPP2_PRS_TAGGED, MVPP2_PRS_DSA);
+		mvpp2_prs_dsa_tag_set(priv, port, true,
+				      MVPP2_PRS_UNTAGGED, MVPP2_PRS_DSA);
+		/* Remove port from EDSA entries */
+		mvpp2_prs_dsa_tag_set(priv, port, false,
+				      MVPP2_PRS_TAGGED, MVPP2_PRS_EDSA);
+		mvpp2_prs_dsa_tag_set(priv, port, false,
+				      MVPP2_PRS_UNTAGGED, MVPP2_PRS_EDSA);
+		break;
+
+	case MVPP2_TAG_TYPE_MH:
+	case MVPP2_TAG_TYPE_NONE:
+		/* Remove port form EDSA and DSA entries */
+		mvpp2_prs_dsa_tag_set(priv, port, false,
+				      MVPP2_PRS_TAGGED, MVPP2_PRS_DSA);
+		mvpp2_prs_dsa_tag_set(priv, port, false,
+				      MVPP2_PRS_UNTAGGED, MVPP2_PRS_DSA);
+		mvpp2_prs_dsa_tag_set(priv, port, false,
+				      MVPP2_PRS_TAGGED, MVPP2_PRS_EDSA);
+		mvpp2_prs_dsa_tag_set(priv, port, false,
+				      MVPP2_PRS_UNTAGGED, MVPP2_PRS_EDSA);
+		break;
+
+	default:
+		if ((type < 0) || (type > MVPP2_TAG_TYPE_EDSA))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Set prs flow for the port */
+static int mvpp2_prs_def_flow(struct mvpp2_port *port)
+{
+	struct mvpp2_prs_entry *pe;
+	int tid;
+
+	pe = mvpp2_prs_flow_find(port->priv, port->id);
+
+	/* Such entry not exist */
+	if (!pe) {
+		/* Go through the all entires from last to first */
+		tid = mvpp2_prs_tcam_first_free(port->priv,
+						MVPP2_PE_LAST_FREE_TID,
+					       MVPP2_PE_FIRST_FREE_TID);
+		if (tid < 0)
+			return tid;
+
+		pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+		if (!pe)
+			return -ENOMEM;
+
+		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_FLOWS);
+		pe->index = tid;
+
+		/* Set flow ID*/
+		mvpp2_prs_sram_ai_update(pe, port->id, MVPP2_PRS_FLOW_ID_MASK);
+		mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_LU_DONE_BIT, 1);
+
+		/* Update shadow table */
+		mvpp2_prs_shadow_set(port->priv, pe->index, MVPP2_PRS_LU_FLOWS);
+	}
+
+	mvpp2_prs_tcam_port_map_set(pe, (1 << port->id));
+	mvpp2_prs_hw_write(port->priv, pe);
+	kfree(pe);
+
+	return 0;
+}
+
+/* Classifier configuration routines */
+
+/* Update classification flow table registers */
+static void mvpp2_cls_flow_write(struct mvpp2 *priv,
+				 struct mvpp2_cls_flow_entry *fe)
+{
+	mvpp2_write(priv, MVPP2_CLS_FLOW_INDEX_REG, fe->index);
+	mvpp2_write(priv, MVPP2_CLS_FLOW_TBL0_REG,  fe->data[0]);
+	mvpp2_write(priv, MVPP2_CLS_FLOW_TBL1_REG,  fe->data[1]);
+	mvpp2_write(priv, MVPP2_CLS_FLOW_TBL2_REG,  fe->data[2]);
+}
+
+/* Update classification lookup table register */
+static void mvpp2_cls_lookup_write(struct mvpp2 *priv,
+				   struct mvpp2_cls_lookup_entry *le)
+{
+	u32 val;
+
+	val = (le->way << MVPP2_CLS_LKP_INDEX_WAY_OFFS) | le->lkpid;
+	mvpp2_write(priv, MVPP2_CLS_LKP_INDEX_REG, val);
+	mvpp2_write(priv, MVPP2_CLS_LKP_TBL_REG, le->data);
+}
+
+/* Classifier default initialization */
+static void mvpp2_cls_init(struct mvpp2 *priv)
+{
+	struct mvpp2_cls_lookup_entry le;
+	struct mvpp2_cls_flow_entry fe;
+	int index;
+
+	/* Enable classifier */
+	mvpp2_write(priv, MVPP2_CLS_MODE_REG, MVPP2_CLS_MODE_ACTIVE_MASK);
+
+	/* Clear classifier flow table */
+	memset(&fe.data, 0, MVPP2_CLS_FLOWS_TBL_DATA_WORDS);
+	for (index = 0; index < MVPP2_CLS_FLOWS_TBL_SIZE; index++) {
+		fe.index = index;
+		mvpp2_cls_flow_write(priv, &fe);
+	}
+
+	/* Clear classifier lookup table */
+	le.data = 0;
+	for (index = 0; index < MVPP2_CLS_LKP_TBL_SIZE; index++) {
+		le.lkpid = index;
+		le.way = 0;
+		mvpp2_cls_lookup_write(priv, &le);
+
+		le.way = 1;
+		mvpp2_cls_lookup_write(priv, &le);
+	}
+}
+
+static void mvpp2_cls_port_config(struct mvpp2_port *port)
+{
+	struct mvpp2_cls_lookup_entry le;
+	u32 val;
+
+	/* Set way for the port */
+	val = mvpp2_read(port->priv, MVPP2_CLS_PORT_WAY_REG);
+	val &= ~MVPP2_CLS_PORT_WAY_MASK(port->id);
+	mvpp2_write(port->priv, MVPP2_CLS_PORT_WAY_REG, val);
+
+	/* Pick the entry to be accessed in lookup ID decoding table
+	 * according to the way and lkpid.
+	 */
+	le.lkpid = port->id;
+	le.way = 0;
+	le.data = 0;
+
+	/* Set initial CPU queue for receiving packets */
+	le.data &= ~MVPP2_CLS_LKP_TBL_RXQ_MASK;
+	le.data |= port->first_rxq;
+
+	/* Disable classification engines */
+	le.data &= ~MVPP2_CLS_LKP_TBL_LOOKUP_EN_MASK;
+
+	/* Update lookup ID table entry */
+	mvpp2_cls_lookup_write(port->priv, &le);
+}
+
+/* Set CPU queue number for oversize packets */
+static void mvpp2_cls_oversize_rxq_set(struct mvpp2_port *port)
+{
+	u32 val;
+
+	mvpp2_write(port->priv, MVPP2_CLS_OVERSIZE_RXQ_LOW_REG(port->id),
+		    port->first_rxq & MVPP2_CLS_OVERSIZE_RXQ_LOW_MASK);
+
+	mvpp2_write(port->priv, MVPP2_CLS_SWFWD_P2HQ_REG(port->id),
+		    (port->first_rxq >> MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS));
+
+	val = mvpp2_read(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG);
+	val |= MVPP2_CLS_SWFWD_PCTRL_MASK(port->id);
+	mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val);
+}
+
+/* Buffer Manager configuration routines */
+
+/* Create pool */
+static int mvpp2_bm_pool_create(struct platform_device *pdev,
+				struct mvpp2 *priv,
+				struct mvpp2_bm_pool *bm_pool, int size)
+{
+	int size_bytes;
+	u32 val;
+
+	size_bytes = sizeof(u32) * size;
+	bm_pool->virt_addr = dma_alloc_coherent(&pdev->dev, size_bytes,
+						&bm_pool->phys_addr,
+						GFP_KERNEL);
+	if (!bm_pool->virt_addr)
+		return -ENOMEM;
+
+	if (!IS_ALIGNED((u32)bm_pool->virt_addr, MVPP2_BM_POOL_PTR_ALIGN)) {
+		dma_free_coherent(&pdev->dev, size_bytes, bm_pool->virt_addr,
+				  bm_pool->phys_addr);
+		dev_err(&pdev->dev, "BM pool %d is not %d bytes aligned\n",
+			bm_pool->id, MVPP2_BM_POOL_PTR_ALIGN);
+		return -ENOMEM;
+	}
+
+	mvpp2_write(priv, MVPP2_BM_POOL_BASE_REG(bm_pool->id),
+		    bm_pool->phys_addr);
+	mvpp2_write(priv, MVPP2_BM_POOL_SIZE_REG(bm_pool->id), size);
+
+	val = mvpp2_read(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id));
+	val |= MVPP2_BM_START_MASK;
+	mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id), val);
+
+	bm_pool->type = MVPP2_BM_FREE;
+	bm_pool->size = size;
+	bm_pool->pkt_size = 0;
+	bm_pool->buf_num = 0;
+	atomic_set(&bm_pool->in_use, 0);
+	spin_lock_init(&bm_pool->lock);
+
+	return 0;
+}
+
+/* Set pool buffer size */
+static void mvpp2_bm_pool_bufsize_set(struct mvpp2 *priv,
+				      struct mvpp2_bm_pool *bm_pool,
+				      int buf_size)
+{
+	u32 val;
+
+	bm_pool->buf_size = buf_size;
+
+	val = ALIGN(buf_size, 1 << MVPP2_POOL_BUF_SIZE_OFFSET);
+	mvpp2_write(priv, MVPP2_POOL_BUF_SIZE_REG(bm_pool->id), val);
+}
+
+/* Free "num" buffers from the pool */
+static int mvpp2_bm_bufs_free(struct mvpp2 *priv,
+			      struct mvpp2_bm_pool *bm_pool, int num)
+{
+	int i;
+
+	if (num >= bm_pool->buf_num)
+		/* Free all buffers from the pool */
+		num = bm_pool->buf_num;
+
+	for (i = 0; i < num; i++) {
+		u32 vaddr;
+
+		/* Get buffer virtual adress (indirect access) */
+		mvpp2_read(priv, MVPP2_BM_PHY_ALLOC_REG(bm_pool->id));
+		vaddr = mvpp2_read(priv, MVPP2_BM_VIRT_ALLOC_REG);
+		if (!vaddr)
+			break;
+		dev_kfree_skb_any((struct sk_buff *)vaddr);
+	}
+
+	/* Update BM driver with number of buffers removed from pool */
+	bm_pool->buf_num -= i;
+	return i;
+}
+
+/* Cleanup pool */
+static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
+				 struct mvpp2 *priv,
+				 struct mvpp2_bm_pool *bm_pool)
+{
+	int num;
+	u32 val;
+
+	num = mvpp2_bm_bufs_free(priv, bm_pool, bm_pool->buf_num);
+	if (num != bm_pool->buf_num) {
+		WARN(1, "cannot free all buffers in pool %d\n", bm_pool->id);
+		return 0;
+	}
+
+	val = mvpp2_read(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id));
+	val |= MVPP2_BM_STOP_MASK;
+	mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id), val);
+
+	dma_free_coherent(&pdev->dev, sizeof(u32) * bm_pool->size,
+			  bm_pool->virt_addr,
+			  bm_pool->phys_addr);
+	return 0;
+}
+
+static int mvpp2_bm_pools_init(struct platform_device *pdev,
+			       struct mvpp2 *priv)
+{
+	int i, err, size;
+	struct mvpp2_bm_pool *bm_pool;
+
+	/* Create all pools with maximum size */
+	size = MVPP2_BM_POOL_SIZE_MAX;
+	for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
+		bm_pool = &priv->bm_pools[i];
+		bm_pool->id = i;
+		err = mvpp2_bm_pool_create(pdev, priv, bm_pool, size);
+		if (err)
+			goto err_unroll_pools;
+		mvpp2_bm_pool_bufsize_set(priv, bm_pool, 0);
+	}
+	return 0;
+
+err_unroll_pools:
+	dev_err(&pdev->dev, "failed to create BM pool %d, size %d\n", i, size);
+	for (i = i - 1; i >= 0; i--)
+		mvpp2_bm_pool_destroy(pdev, priv, &priv->bm_pools[i]);
+	return err;
+}
+
+static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv)
+{
+	int i, err;
+
+	for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
+		/* Mask BM all interrupts */
+		mvpp2_write(priv, MVPP2_BM_INTR_MASK_REG(i), 0);
+		/* Clear BM cause register */
+		mvpp2_write(priv, MVPP2_BM_INTR_CAUSE_REG(i), 0);
+	}
+
+	/* Allocate and initialize BM pools */
+	priv->bm_pools = devm_kcalloc(&pdev->dev, MVPP2_BM_POOLS_NUM,
+				     sizeof(struct mvpp2_bm_pool), GFP_KERNEL);
+	if (!priv->bm_pools)
+		return -ENOMEM;
+
+	err = mvpp2_bm_pools_init(pdev, priv);
+	if (err < 0)
+		return err;
+	return 0;
+}
+
+/* Attach long pool to rxq */
+static void mvpp2_rxq_long_pool_set(struct mvpp2_port *port,
+				    int lrxq, int long_pool)
+{
+	u32 val;
+	int prxq;
+
+	/* Get queue physical ID */
+	prxq = port->rxqs[lrxq]->id;
+
+	val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(prxq));
+	val &= ~MVPP2_RXQ_POOL_LONG_MASK;
+	val |= ((long_pool << MVPP2_RXQ_POOL_LONG_OFFS) &
+		    MVPP2_RXQ_POOL_LONG_MASK);
+
+	mvpp2_write(port->priv, MVPP2_RXQ_CONFIG_REG(prxq), val);
+}
+
+/* Attach short pool to rxq */
+static void mvpp2_rxq_short_pool_set(struct mvpp2_port *port,
+				     int lrxq, int short_pool)
+{
+	u32 val;
+	int prxq;
+
+	/* Get queue physical ID */
+	prxq = port->rxqs[lrxq]->id;
+
+	val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(prxq));
+	val &= ~MVPP2_RXQ_POOL_SHORT_MASK;
+	val |= ((short_pool << MVPP2_RXQ_POOL_SHORT_OFFS) &
+		    MVPP2_RXQ_POOL_SHORT_MASK);
+
+	mvpp2_write(port->priv, MVPP2_RXQ_CONFIG_REG(prxq), val);
+}
+
+/* Allocate skb for BM pool */
+static struct sk_buff *mvpp2_skb_alloc(struct mvpp2_port *port,
+				       struct mvpp2_bm_pool *bm_pool,
+				       dma_addr_t *buf_phys_addr,
+				       gfp_t gfp_mask)
+{
+	struct sk_buff *skb;
+	dma_addr_t phys_addr;
+
+	skb = __dev_alloc_skb(bm_pool->pkt_size, gfp_mask);
+	if (!skb)
+		return NULL;
+
+	phys_addr = dma_map_single(port->dev->dev.parent, skb->head,
+				   MVPP2_RX_BUF_SIZE(bm_pool->pkt_size),
+				    DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(port->dev->dev.parent, phys_addr))) {
+		dev_kfree_skb_any(skb);
+		return NULL;
+	}
+	*buf_phys_addr = phys_addr;
+
+	return skb;
+}
+
+/* Set pool number in a BM cookie */
+static inline u32 mvpp2_bm_cookie_pool_set(u32 cookie, int pool)
+{
+	u32 bm;
+
+	bm = cookie & ~(0xFF << MVPP2_BM_COOKIE_POOL_OFFS);
+	bm |= ((pool & 0xFF) << MVPP2_BM_COOKIE_POOL_OFFS);
+
+	return bm;
+}
+
+/* Get pool number from a BM cookie */
+static inline int mvpp2_bm_cookie_pool_get(u32 cookie)
+{
+	return (cookie >> MVPP2_BM_COOKIE_POOL_OFFS) & 0xFF;
+}
+
+/* Release buffer to BM */
+static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
+				     u32 buf_phys_addr, u32 buf_virt_addr)
+{
+	mvpp2_write(port->priv, MVPP2_BM_VIRT_RLS_REG, buf_virt_addr);
+	mvpp2_write(port->priv, MVPP2_BM_PHY_RLS_REG(pool), buf_phys_addr);
+}
+
+/* Release multicast buffer */
+static void mvpp2_bm_pool_mc_put(struct mvpp2_port *port, int pool,
+				 u32 buf_phys_addr, u32 buf_virt_addr,
+				 int mc_id)
+{
+	u32 val = 0;
+
+	val |= (mc_id & MVPP2_BM_MC_ID_MASK);
+	mvpp2_write(port->priv, MVPP2_BM_MC_RLS_REG, val);
+
+	mvpp2_bm_pool_put(port, pool,
+			  buf_phys_addr | MVPP2_BM_PHY_RLS_MC_BUFF_MASK,
+			  buf_virt_addr);
+}
+
+/* Refill BM pool */
+static void mvpp2_pool_refill(struct mvpp2_port *port, u32 bm,
+			      u32 phys_addr, u32 cookie)
+{
+	int pool = mvpp2_bm_cookie_pool_get(bm);
+
+	mvpp2_bm_pool_put(port, pool, phys_addr, cookie);
+}
+
+/* Allocate buffers for the pool */
+static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
+			     struct mvpp2_bm_pool *bm_pool, int buf_num)
+{
+	struct sk_buff *skb;
+	int i, buf_size, total_size;
+	u32 bm;
+	dma_addr_t phys_addr;
+
+	buf_size = MVPP2_RX_BUF_SIZE(bm_pool->pkt_size);
+	total_size = MVPP2_RX_TOTAL_SIZE(buf_size);
+
+	if (buf_num < 0 ||
+	    (buf_num + bm_pool->buf_num > bm_pool->size)) {
+		netdev_err(port->dev,
+			   "cannot allocate %d buffers for pool %d\n",
+			   buf_num, bm_pool->id);
+		return 0;
+	}
+
+	bm = mvpp2_bm_cookie_pool_set(0, bm_pool->id);
+	for (i = 0; i < buf_num; i++) {
+		skb = mvpp2_skb_alloc(port, bm_pool, &phys_addr, GFP_KERNEL);
+		if (!skb)
+			break;
+
+		mvpp2_pool_refill(port, bm, (u32)phys_addr, (u32)skb);
+	}
+
+	/* Update BM driver with number of buffers added to pool */
+	bm_pool->buf_num += i;
+	bm_pool->in_use_thresh = bm_pool->buf_num / 4;
+
+	netdev_dbg(port->dev,
+		   "%s pool %d: pkt_size=%4d, buf_size=%4d, total_size=%4d\n",
+		   bm_pool->type == MVPP2_BM_SWF_SHORT ? "short" : " long",
+		   bm_pool->id, bm_pool->pkt_size, buf_size, total_size);
+
+	netdev_dbg(port->dev,
+		   "%s pool %d: %d of %d buffers added\n",
+		   bm_pool->type == MVPP2_BM_SWF_SHORT ? "short" : " long",
+		   bm_pool->id, i, buf_num);
+	return i;
+}
+
+/* Notify the driver that BM pool is being used as specific type and return the
+ * pool pointer on success
+ */
+static struct mvpp2_bm_pool *
+mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
+		  int pkt_size)
+{
+	unsigned long flags = 0;
+	struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool];
+	int num;
+
+	if (new_pool->type != MVPP2_BM_FREE && new_pool->type != type) {
+		netdev_err(port->dev, "mixing pool types is forbidden\n");
+		return NULL;
+	}
+
+	spin_lock_irqsave(&new_pool->lock, flags);
+
+	if (new_pool->type == MVPP2_BM_FREE)
+		new_pool->type = type;
+
+	/* Allocate buffers in case BM pool is used as long pool, but packet
+	 * size doesn't match MTU or BM pool hasn't being used yet
+	 */
+	if (((type == MVPP2_BM_SWF_LONG) && (pkt_size > new_pool->pkt_size)) ||
+	    (new_pool->pkt_size == 0)) {
+		int pkts_num;
+
+		/* Set default buffer number or free all the buffers in case
+		 * the pool is not empty
+		 */
+		pkts_num = new_pool->buf_num;
+		if (pkts_num == 0)
+			pkts_num = type == MVPP2_BM_SWF_LONG ?
+				   MVPP2_BM_LONG_BUF_NUM :
+				   MVPP2_BM_SHORT_BUF_NUM;
+		else
+			mvpp2_bm_bufs_free(port->priv, new_pool, pkts_num);
+
+		new_pool->pkt_size = pkt_size;
+
+		/* Allocate buffers for this pool */
+		num = mvpp2_bm_bufs_add(port, new_pool, pkts_num);
+		if (num != pkts_num) {
+			WARN(1, "pool %d: %d of %d allocated\n",
+			     new_pool->id, num, pkts_num);
+			/* We need to undo the bufs_add() allocations */
+			spin_unlock_irqrestore(&new_pool->lock, flags);
+			return NULL;
+		}
+	}
+
+	mvpp2_bm_pool_bufsize_set(port->priv, new_pool,
+				  MVPP2_RX_BUF_SIZE(new_pool->pkt_size));
+
+	spin_unlock_irqrestore(&new_pool->lock, flags);
+
+	return new_pool;
+}
+
+/* Initialize pools for swf */
+static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
+{
+	unsigned long flags = 0;
+	int rxq;
+
+	if (!port->pool_long) {
+		port->pool_long =
+		       mvpp2_bm_pool_use(port, MVPP2_BM_SWF_LONG_POOL(port->id),
+					 MVPP2_BM_SWF_LONG,
+					 port->pkt_size);
+		if (!port->pool_long)
+			return -ENOMEM;
+
+		spin_lock_irqsave(&port->pool_long->lock, flags);
+		port->pool_long->port_map |= (1 << port->id);
+		spin_unlock_irqrestore(&port->pool_long->lock, flags);
+
+		for (rxq = 0; rxq < rxq_number; rxq++)
+			mvpp2_rxq_long_pool_set(port, rxq, port->pool_long->id);
+	}
+
+	if (!port->pool_short) {
+		port->pool_short =
+			mvpp2_bm_pool_use(port, MVPP2_BM_SWF_SHORT_POOL,
+					  MVPP2_BM_SWF_SHORT,
+					  MVPP2_BM_SHORT_PKT_SIZE);
+		if (!port->pool_short)
+			return -ENOMEM;
+
+		spin_lock_irqsave(&port->pool_short->lock, flags);
+		port->pool_short->port_map |= (1 << port->id);
+		spin_unlock_irqrestore(&port->pool_short->lock, flags);
+
+		for (rxq = 0; rxq < rxq_number; rxq++)
+			mvpp2_rxq_short_pool_set(port, rxq,
+						 port->pool_short->id);
+	}
+
+	return 0;
+}
+
+static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2_bm_pool *port_pool = port->pool_long;
+	int num, pkts_num = port_pool->buf_num;
+	int pkt_size = MVPP2_RX_PKT_SIZE(mtu);
+
+	/* Update BM pool with new buffer size */
+	num = mvpp2_bm_bufs_free(port->priv, port_pool, pkts_num);
+	if (num != pkts_num) {
+		WARN(1, "cannot free all buffers in pool %d\n", port_pool->id);
+		return -EIO;
+	}
+
+	port_pool->pkt_size = pkt_size;
+	num = mvpp2_bm_bufs_add(port, port_pool, pkts_num);
+	if (num != pkts_num) {
+		WARN(1, "pool %d: %d of %d allocated\n",
+		     port_pool->id, num, pkts_num);
+		return -EIO;
+	}
+
+	mvpp2_bm_pool_bufsize_set(port->priv, port_pool,
+				  MVPP2_RX_BUF_SIZE(port_pool->pkt_size));
+	dev->mtu = mtu;
+	netdev_update_features(dev);
+	return 0;
+}
+
+static inline void mvpp2_interrupts_enable(struct mvpp2_port *port)
+{
+	int cpu, cpu_mask = 0;
+
+	for_each_present_cpu(cpu)
+		cpu_mask |= 1 << cpu;
+	mvpp2_write(port->priv, MVPP2_ISR_ENABLE_REG(port->id),
+		    MVPP2_ISR_ENABLE_INTERRUPT(cpu_mask));
+}
+
+static inline void mvpp2_interrupts_disable(struct mvpp2_port *port)
+{
+	int cpu, cpu_mask = 0;
+
+	for_each_present_cpu(cpu)
+		cpu_mask |= 1 << cpu;
+	mvpp2_write(port->priv, MVPP2_ISR_ENABLE_REG(port->id),
+		    MVPP2_ISR_DISABLE_INTERRUPT(cpu_mask));
+}
+
+/* Mask the current CPU's Rx/Tx interrupts */
+static void mvpp2_interrupts_mask(void *arg)
+{
+	struct mvpp2_port *port = arg;
+
+	mvpp2_write(port->priv, MVPP2_ISR_RX_TX_MASK_REG(port->id), 0);
+}
+
+/* Unmask the current CPU's Rx/Tx interrupts */
+static void mvpp2_interrupts_unmask(void *arg)
+{
+	struct mvpp2_port *port = arg;
+
+	mvpp2_write(port->priv, MVPP2_ISR_RX_TX_MASK_REG(port->id),
+		    (MVPP2_CAUSE_MISC_SUM_MASK |
+		     MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK |
+		     MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK));
+}
+
+/* Port configuration routines */
+
+static void mvpp2_port_mii_set(struct mvpp2_port *port)
+{
+	u32 reg, val = 0;
+
+	if (port->phy_interface == PHY_INTERFACE_MODE_SGMII)
+		val = MVPP2_GMAC_PCS_ENABLE_MASK |
+		      MVPP2_GMAC_INBAND_AN_MASK;
+	else if (port->phy_interface == PHY_INTERFACE_MODE_RGMII)
+		val = MVPP2_GMAC_PORT_RGMII_MASK;
+
+	reg = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+	writel(reg | val, port->base + MVPP2_GMAC_CTRL_2_REG);
+}
+
+static void mvpp2_port_enable(struct mvpp2_port *port)
+{
+	u32 val;
+
+	val = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
+	val |= MVPP2_GMAC_PORT_EN_MASK;
+	val |= MVPP2_GMAC_MIB_CNTR_EN_MASK;
+	writel(val, port->base + MVPP2_GMAC_CTRL_0_REG);
+}
+
+static void mvpp2_port_disable(struct mvpp2_port *port)
+{
+	u32 val;
+
+	val = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
+	val &= ~(MVPP2_GMAC_PORT_EN_MASK);
+	writel(val, port->base + MVPP2_GMAC_CTRL_0_REG);
+}
+
+/* Set IEEE 802.3x Flow Control Xon Packet Transmission Mode */
+static void mvpp2_port_periodic_xon_disable(struct mvpp2_port *port)
+{
+	u32 val;
+
+	val = readl(port->base + MVPP2_GMAC_CTRL_1_REG) &
+		    ~MVPP2_GMAC_PERIODIC_XON_EN_MASK;
+	writel(val, port->base + MVPP2_GMAC_CTRL_1_REG);
+}
+
+/* Configure loopback port */
+static void mvpp2_port_loopback_set(struct mvpp2_port *port)
+{
+	u32 val;
+
+	val = readl(port->base + MVPP2_GMAC_CTRL_1_REG);
+
+	if (port->speed == 1000)
+		val |= MVPP2_GMAC_GMII_LB_EN_MASK;
+	else
+		val &= ~MVPP2_GMAC_GMII_LB_EN_MASK;
+
+	if (port->phy_interface == PHY_INTERFACE_MODE_SGMII)
+		val |= MVPP2_GMAC_PCS_LB_EN_MASK;
+	else
+		val &= ~MVPP2_GMAC_PCS_LB_EN_MASK;
+
+	writel(val, port->base + MVPP2_GMAC_CTRL_1_REG);
+}
+
+static void mvpp2_port_reset(struct mvpp2_port *port)
+{
+	u32 val;
+
+	val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
+		    ~MVPP2_GMAC_PORT_RESET_MASK;
+	writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
+
+	while (readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
+	       MVPP2_GMAC_PORT_RESET_MASK)
+		continue;
+}
+
+/* Change maximum receive size of the port */
+static inline void mvpp2_gmac_max_rx_size_set(struct mvpp2_port *port)
+{
+	u32 val;
+
+	val = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
+	val &= ~MVPP2_GMAC_MAX_RX_SIZE_MASK;
+	val |= (((port->pkt_size - MVPP2_MH_SIZE) / 2) <<
+		    MVPP2_GMAC_MAX_RX_SIZE_OFFS);
+	writel(val, port->base + MVPP2_GMAC_CTRL_0_REG);
+}
+
+/* Set defaults to the MVPP2 port */
+static void mvpp2_defaults_set(struct mvpp2_port *port)
+{
+	int tx_port_num, val, queue, ptxq, lrxq;
+
+	/* Configure port to loopback if needed */
+	if (port->flags & MVPP2_F_LOOPBACK)
+		mvpp2_port_loopback_set(port);
+
+	/* Update TX FIFO MIN Threshold */
+	val = readl(port->base + MVPP2_GMAC_PORT_FIFO_CFG_1_REG);
+	val &= ~MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK;
+	/* Min. TX threshold must be less than minimal packet length */
+	val |= MVPP2_GMAC_TX_FIFO_MIN_TH_MASK(64 - 4 - 2);
+	writel(val, port->base + MVPP2_GMAC_PORT_FIFO_CFG_1_REG);
+
+	/* Disable Legacy WRR, Disable EJP, Release from reset */
+	tx_port_num = mvpp2_egress_port(port);
+	mvpp2_write(port->priv, MVPP2_TXP_SCHED_PORT_INDEX_REG,
+		    tx_port_num);
+	mvpp2_write(port->priv, MVPP2_TXP_SCHED_CMD_1_REG, 0);
+
+	/* Close bandwidth for all queues */
+	for (queue = 0; queue < MVPP2_MAX_TXQ; queue++) {
+		ptxq = mvpp2_txq_phys(port->id, queue);
+		mvpp2_write(port->priv,
+			    MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(ptxq), 0);
+	}
+
+	/* Set refill period to 1 usec, refill tokens
+	 * and bucket size to maximum
+	 */
+	mvpp2_write(port->priv, MVPP2_TXP_SCHED_PERIOD_REG,
+		    port->priv->tclk / USEC_PER_SEC);
+	val = mvpp2_read(port->priv, MVPP2_TXP_SCHED_REFILL_REG);
+	val &= ~MVPP2_TXP_REFILL_PERIOD_ALL_MASK;
+	val |= MVPP2_TXP_REFILL_PERIOD_MASK(1);
+	val |= MVPP2_TXP_REFILL_TOKENS_ALL_MASK;
+	mvpp2_write(port->priv, MVPP2_TXP_SCHED_REFILL_REG, val);
+	val = MVPP2_TXP_TOKEN_SIZE_MAX;
+	mvpp2_write(port->priv, MVPP2_TXP_SCHED_TOKEN_SIZE_REG, val);
+
+	/* Set MaximumLowLatencyPacketSize value to 256 */
+	mvpp2_write(port->priv, MVPP2_RX_CTRL_REG(port->id),
+		    MVPP2_RX_USE_PSEUDO_FOR_CSUM_MASK |
+		    MVPP2_RX_LOW_LATENCY_PKT_SIZE(256));
+
+	/* Enable Rx cache snoop */
+	for (lrxq = 0; lrxq < rxq_number; lrxq++) {
+		queue = port->rxqs[lrxq]->id;
+		val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(queue));
+		val |= MVPP2_SNOOP_PKT_SIZE_MASK |
+			   MVPP2_SNOOP_BUF_HDR_MASK;
+		mvpp2_write(port->priv, MVPP2_RXQ_CONFIG_REG(queue), val);
+	}
+
+	/* At default, mask all interrupts to all present cpus */
+	mvpp2_interrupts_disable(port);
+}
+
+/* Enable/disable receiving packets */
+static void mvpp2_ingress_enable(struct mvpp2_port *port)
+{
+	u32 val;
+	int lrxq, queue;
+
+	for (lrxq = 0; lrxq < rxq_number; lrxq++) {
+		queue = port->rxqs[lrxq]->id;
+		val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(queue));
+		val &= ~MVPP2_RXQ_DISABLE_MASK;
+		mvpp2_write(port->priv, MVPP2_RXQ_CONFIG_REG(queue), val);
+	}
+}
+
+static void mvpp2_ingress_disable(struct mvpp2_port *port)
+{
+	u32 val;
+	int lrxq, queue;
+
+	for (lrxq = 0; lrxq < rxq_number; lrxq++) {
+		queue = port->rxqs[lrxq]->id;
+		val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(queue));
+		val |= MVPP2_RXQ_DISABLE_MASK;
+		mvpp2_write(port->priv, MVPP2_RXQ_CONFIG_REG(queue), val);
+	}
+}
+
+/* Enable transmit via physical egress queue
+ * - HW starts take descriptors from DRAM
+ */
+static void mvpp2_egress_enable(struct mvpp2_port *port)
+{
+	u32 qmap;
+	int queue;
+	int tx_port_num = mvpp2_egress_port(port);
+
+	/* Enable all initialized TXs. */
+	qmap = 0;
+	for (queue = 0; queue < txq_number; queue++) {
+		struct mvpp2_tx_queue *txq = port->txqs[queue];
+
+		if (txq->descs != NULL)
+			qmap |= (1 << queue);
+	}
+
+	mvpp2_write(port->priv, MVPP2_TXP_SCHED_PORT_INDEX_REG, tx_port_num);
+	mvpp2_write(port->priv, MVPP2_TXP_SCHED_Q_CMD_REG, qmap);
+}
+
+/* Disable transmit via physical egress queue
+ * - HW doesn't take descriptors from DRAM
+ */
+static void mvpp2_egress_disable(struct mvpp2_port *port)
+{
+	u32 reg_data;
+	int delay;
+	int tx_port_num = mvpp2_egress_port(port);
+
+	/* Issue stop command for active channels only */
+	mvpp2_write(port->priv, MVPP2_TXP_SCHED_PORT_INDEX_REG, tx_port_num);
+	reg_data = (mvpp2_read(port->priv, MVPP2_TXP_SCHED_Q_CMD_REG)) &
+		    MVPP2_TXP_SCHED_ENQ_MASK;
+	if (reg_data != 0)
+		mvpp2_write(port->priv, MVPP2_TXP_SCHED_Q_CMD_REG,
+			    (reg_data << MVPP2_TXP_SCHED_DISQ_OFFSET));
+
+	/* Wait for all Tx activity to terminate. */
+	delay = 0;
+	do {
+		if (delay >= MVPP2_TX_DISABLE_TIMEOUT_MSEC) {
+			netdev_warn(port->dev,
+				    "Tx stop timed out, status=0x%08x\n",
+				    reg_data);
+			break;
+		}
+		mdelay(1);
+		delay++;
+
+		/* Check port TX Command register that all
+		 * Tx queues are stopped
+		 */
+		reg_data = mvpp2_read(port->priv, MVPP2_TXP_SCHED_Q_CMD_REG);
+	} while (reg_data & MVPP2_TXP_SCHED_ENQ_MASK);
+}
+
+/* Rx descriptors helper methods */
+
+/* Get number of Rx descriptors occupied by received packets */
+static inline int
+mvpp2_rxq_received(struct mvpp2_port *port, int rxq_id)
+{
+	u32 val = mvpp2_read(port->priv, MVPP2_RXQ_STATUS_REG(rxq_id));
+
+	return val & MVPP2_RXQ_OCCUPIED_MASK;
+}
+
+/* Update Rx queue status with the number of occupied and available
+ * Rx descriptor slots.
+ */
+static inline void
+mvpp2_rxq_status_update(struct mvpp2_port *port, int rxq_id,
+			int used_count, int free_count)
+{
+	/* Decrement the number of used descriptors and increment count
+	 * increment the number of free descriptors.
+	 */
+	u32 val = used_count | (free_count << MVPP2_RXQ_NUM_NEW_OFFSET);
+
+	mvpp2_write(port->priv, MVPP2_RXQ_STATUS_UPDATE_REG(rxq_id), val);
+}
+
+/* Get pointer to next RX descriptor to be processed by SW */
+static inline struct mvpp2_rx_desc *
+mvpp2_rxq_next_desc_get(struct mvpp2_rx_queue *rxq)
+{
+	int rx_desc = rxq->next_desc_to_proc;
+
+	rxq->next_desc_to_proc = MVPP2_QUEUE_NEXT_DESC(rxq, rx_desc);
+	prefetch(rxq->descs + rxq->next_desc_to_proc);
+	return rxq->descs + rx_desc;
+}
+
+/* Set rx queue offset */
+static void mvpp2_rxq_offset_set(struct mvpp2_port *port,
+				 int prxq, int offset)
+{
+	u32 val;
+
+	/* Convert offset from bytes to units of 32 bytes */
+	offset = offset >> 5;
+
+	val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(prxq));
+	val &= ~MVPP2_RXQ_PACKET_OFFSET_MASK;
+
+	/* Offset is in */
+	val |= ((offset << MVPP2_RXQ_PACKET_OFFSET_OFFS) &
+		    MVPP2_RXQ_PACKET_OFFSET_MASK);
+
+	mvpp2_write(port->priv, MVPP2_RXQ_CONFIG_REG(prxq), val);
+}
+
+/* Obtain BM cookie information from descriptor */
+static u32 mvpp2_bm_cookie_build(struct mvpp2_rx_desc *rx_desc)
+{
+	int pool = (rx_desc->status & MVPP2_RXD_BM_POOL_ID_MASK) >>
+		   MVPP2_RXD_BM_POOL_ID_OFFS;
+	int cpu = smp_processor_id();
+
+	return ((pool & 0xFF) << MVPP2_BM_COOKIE_POOL_OFFS) |
+	       ((cpu & 0xFF) << MVPP2_BM_COOKIE_CPU_OFFS);
+}
+
+/* Tx descriptors helper methods */
+
+/* Get number of Tx descriptors waiting to be transmitted by HW */
+static int mvpp2_txq_pend_desc_num_get(struct mvpp2_port *port,
+				       struct mvpp2_tx_queue *txq)
+{
+	u32 val;
+
+	mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id);
+	val = mvpp2_read(port->priv, MVPP2_TXQ_PENDING_REG);
+
+	return val & MVPP2_TXQ_PENDING_MASK;
+}
+
+/* Get pointer to next Tx descriptor to be processed (send) by HW */
+static struct mvpp2_tx_desc *
+mvpp2_txq_next_desc_get(struct mvpp2_tx_queue *txq)
+{
+	int tx_desc = txq->next_desc_to_proc;
+
+	txq->next_desc_to_proc = MVPP2_QUEUE_NEXT_DESC(txq, tx_desc);
+	return txq->descs + tx_desc;
+}
+
+/* Update HW with number of aggregated Tx descriptors to be sent */
+static void mvpp2_aggr_txq_pend_desc_add(struct mvpp2_port *port, int pending)
+{
+	/* aggregated access - relevant TXQ number is written in TX desc */
+	mvpp2_write(port->priv, MVPP2_AGGR_TXQ_UPDATE_REG, pending);
+}
+
+
+/* Check if there are enough free descriptors in aggregated txq.
+ * If not, update the number of occupied descriptors and repeat the check.
+ */
+static int mvpp2_aggr_desc_num_check(struct mvpp2 *priv,
+				     struct mvpp2_tx_queue *aggr_txq, int num)
+{
+	if ((aggr_txq->count + num) > aggr_txq->size) {
+		/* Update number of occupied aggregated Tx descriptors */
+		int cpu = smp_processor_id();
+		u32 val = mvpp2_read(priv, MVPP2_AGGR_TXQ_STATUS_REG(cpu));
+
+		aggr_txq->count = val & MVPP2_AGGR_TXQ_PENDING_MASK;
+	}
+
+	if ((aggr_txq->count + num) > aggr_txq->size)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/* Reserved Tx descriptors allocation request */
+static int mvpp2_txq_alloc_reserved_desc(struct mvpp2 *priv,
+					 struct mvpp2_tx_queue *txq, int num)
+{
+	u32 val;
+
+	val = (txq->id << MVPP2_TXQ_RSVD_REQ_Q_OFFSET) | num;
+	mvpp2_write(priv, MVPP2_TXQ_RSVD_REQ_REG, val);
+
+	val = mvpp2_read(priv, MVPP2_TXQ_RSVD_RSLT_REG);
+
+	return val & MVPP2_TXQ_RSVD_RSLT_MASK;
+}
+
+/* Check if there are enough reserved descriptors for transmission.
+ * If not, request chunk of reserved descriptors and check again.
+ */
+static int mvpp2_txq_reserved_desc_num_proc(struct mvpp2 *priv,
+					    struct mvpp2_tx_queue *txq,
+					    struct mvpp2_txq_pcpu *txq_pcpu,
+					    int num)
+{
+	int req, cpu, desc_count;
+
+	if (txq_pcpu->reserved_num >= num)
+		return 0;
+
+	/* Not enough descriptors reserved! Update the reserved descriptor
+	 * count and check again.
+	 */
+
+	desc_count = 0;
+	/* Compute total of used descriptors */
+	for_each_present_cpu(cpu) {
+		struct mvpp2_txq_pcpu *txq_pcpu_aux;
+
+		txq_pcpu_aux = per_cpu_ptr(txq->pcpu, cpu);
+		desc_count += txq_pcpu_aux->count;
+		desc_count += txq_pcpu_aux->reserved_num;
+	}
+
+	req = max(MVPP2_CPU_DESC_CHUNK, num - txq_pcpu->reserved_num);
+	desc_count += req;
+
+	if (desc_count >
+	   (txq->size - (num_present_cpus() * MVPP2_CPU_DESC_CHUNK)))
+		return -ENOMEM;
+
+	txq_pcpu->reserved_num += mvpp2_txq_alloc_reserved_desc(priv, txq, req);
+
+	/* OK, the descriptor cound has been updated: check again. */
+	if (txq_pcpu->reserved_num < num)
+		return -ENOMEM;
+	return 0;
+}
+
+/* Release the last allocated Tx descriptor. Useful to handle DMA
+ * mapping failures in the Tx path.
+ */
+static void mvpp2_txq_desc_put(struct mvpp2_tx_queue *txq)
+{
+	if (txq->next_desc_to_proc == 0)
+		txq->next_desc_to_proc = txq->last_desc - 1;
+	else
+		txq->next_desc_to_proc--;
+}
+
+/* Set Tx descriptors fields relevant for CSUM calculation */
+static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto,
+			       int ip_hdr_len, int l4_proto)
+{
+	u32 command;
+
+	/* fields: L3_offset, IP_hdrlen, L3_type, G_IPv4_chk,
+	 * G_L4_chk, L4_type required only for checksum calculation
+	 */
+	command = (l3_offs << MVPP2_TXD_L3_OFF_SHIFT);
+	command |= (ip_hdr_len << MVPP2_TXD_IP_HLEN_SHIFT);
+	command |= MVPP2_TXD_IP_CSUM_DISABLE;
+
+	if (l3_proto == swab16(ETH_P_IP)) {
+		command &= ~MVPP2_TXD_IP_CSUM_DISABLE;	/* enable IPv4 csum */
+		command &= ~MVPP2_TXD_L3_IP6;		/* enable IPv4 */
+	} else {
+		command |= MVPP2_TXD_L3_IP6;		/* enable IPv6 */
+	}
+
+	if (l4_proto == IPPROTO_TCP) {
+		command &= ~MVPP2_TXD_L4_UDP;		/* enable TCP */
+		command &= ~MVPP2_TXD_L4_CSUM_FRAG;	/* generate L4 csum */
+	} else if (l4_proto == IPPROTO_UDP) {
+		command |= MVPP2_TXD_L4_UDP;		/* enable UDP */
+		command &= ~MVPP2_TXD_L4_CSUM_FRAG;	/* generate L4 csum */
+	} else {
+		command |= MVPP2_TXD_L4_CSUM_NOT;
+	}
+
+	return command;
+}
+
+/* Get number of sent descriptors and decrement counter.
+ * The number of sent descriptors is returned.
+ * Per-CPU access
+ */
+static inline int mvpp2_txq_sent_desc_proc(struct mvpp2_port *port,
+					   struct mvpp2_tx_queue *txq)
+{
+	u32 val;
+
+	/* Reading status reg resets transmitted descriptor counter */
+	val = mvpp2_read(port->priv, MVPP2_TXQ_SENT_REG(txq->id));
+
+	return (val & MVPP2_TRANSMITTED_COUNT_MASK) >>
+		MVPP2_TRANSMITTED_COUNT_OFFSET;
+}
+
+static void mvpp2_txq_sent_counter_clear(void *arg)
+{
+	struct mvpp2_port *port = arg;
+	int queue;
+
+	for (queue = 0; queue < txq_number; queue++) {
+		int id = port->txqs[queue]->id;
+
+		mvpp2_read(port->priv, MVPP2_TXQ_SENT_REG(id));
+	}
+}
+
+/* Set max sizes for Tx queues */
+static void mvpp2_txp_max_tx_size_set(struct mvpp2_port *port)
+{
+	u32	val, size, mtu;
+	int	txq, tx_port_num;
+
+	mtu = port->pkt_size * 8;
+	if (mtu > MVPP2_TXP_MTU_MAX)
+		mtu = MVPP2_TXP_MTU_MAX;
+
+	/* WA for wrong Token bucket update: Set MTU value = 3*real MTU value */
+	mtu = 3 * mtu;
+
+	/* Indirect access to registers */
+	tx_port_num = mvpp2_egress_port(port);
+	mvpp2_write(port->priv, MVPP2_TXP_SCHED_PORT_INDEX_REG, tx_port_num);
+
+	/* Set MTU */
+	val = mvpp2_read(port->priv, MVPP2_TXP_SCHED_MTU_REG);
+	val &= ~MVPP2_TXP_MTU_MAX;
+	val |= mtu;
+	mvpp2_write(port->priv, MVPP2_TXP_SCHED_MTU_REG, val);
+
+	/* TXP token size and all TXQs token size must be larger that MTU */
+	val = mvpp2_read(port->priv, MVPP2_TXP_SCHED_TOKEN_SIZE_REG);
+	size = val & MVPP2_TXP_TOKEN_SIZE_MAX;
+	if (size < mtu) {
+		size = mtu;
+		val &= ~MVPP2_TXP_TOKEN_SIZE_MAX;
+		val |= size;
+		mvpp2_write(port->priv, MVPP2_TXP_SCHED_TOKEN_SIZE_REG, val);
+	}
+
+	for (txq = 0; txq < txq_number; txq++) {
+		val = mvpp2_read(port->priv,
+				 MVPP2_TXQ_SCHED_TOKEN_SIZE_REG(txq));
+		size = val & MVPP2_TXQ_TOKEN_SIZE_MAX;
+
+		if (size < mtu) {
+			size = mtu;
+			val &= ~MVPP2_TXQ_TOKEN_SIZE_MAX;
+			val |= size;
+			mvpp2_write(port->priv,
+				    MVPP2_TXQ_SCHED_TOKEN_SIZE_REG(txq),
+				    val);
+		}
+	}
+}
+
+/* Set the number of packets that will be received before Rx interrupt
+ * will be generated by HW.
+ */
+static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port,
+				   struct mvpp2_rx_queue *rxq, u32 pkts)
+{
+	u32 val;
+
+	val = (pkts & MVPP2_OCCUPIED_THRESH_MASK);
+	mvpp2_write(port->priv, MVPP2_RXQ_NUM_REG, rxq->id);
+	mvpp2_write(port->priv, MVPP2_RXQ_THRESH_REG, val);
+
+	rxq->pkts_coal = pkts;
+}
+
+/* Set the time delay in usec before Rx interrupt */
+static void mvpp2_rx_time_coal_set(struct mvpp2_port *port,
+				   struct mvpp2_rx_queue *rxq, u32 usec)
+{
+	u32 val;
+
+	val = (port->priv->tclk / USEC_PER_SEC) * usec;
+	mvpp2_write(port->priv, MVPP2_ISR_RX_THRESHOLD_REG(rxq->id), val);
+
+	rxq->time_coal = usec;
+}
+
+/* Set threshold for TX_DONE pkts coalescing */
+static void mvpp2_tx_done_pkts_coal_set(void *arg)
+{
+	struct mvpp2_port *port = arg;
+	int queue;
+	u32 val;
+
+	for (queue = 0; queue < txq_number; queue++) {
+		struct mvpp2_tx_queue *txq = port->txqs[queue];
+
+		val = (txq->done_pkts_coal << MVPP2_TRANSMITTED_THRESH_OFFSET) &
+		       MVPP2_TRANSMITTED_THRESH_MASK;
+		mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id);
+		mvpp2_write(port->priv, MVPP2_TXQ_THRESH_REG, val);
+	}
+}
+
+/* Free Tx queue skbuffs */
+static void mvpp2_txq_bufs_free(struct mvpp2_port *port,
+				struct mvpp2_tx_queue *txq,
+				struct mvpp2_txq_pcpu *txq_pcpu, int num)
+{
+	int i;
+
+	for (i = 0; i < num; i++) {
+		struct mvpp2_tx_desc *tx_desc = txq->descs +
+							txq_pcpu->txq_get_index;
+		struct sk_buff *skb = txq_pcpu->tx_skb[txq_pcpu->txq_get_index];
+
+		mvpp2_txq_inc_get(txq_pcpu);
+
+		if (!skb)
+			continue;
+
+		dma_unmap_single(port->dev->dev.parent, tx_desc->buf_phys_addr,
+				 tx_desc->data_size, DMA_TO_DEVICE);
+		dev_kfree_skb_any(skb);
+	}
+}
+
+static inline struct mvpp2_rx_queue *mvpp2_get_rx_queue(struct mvpp2_port *port,
+							u32 cause)
+{
+	int queue = fls(cause) - 1;
+
+	return port->rxqs[queue];
+}
+
+static inline struct mvpp2_tx_queue *mvpp2_get_tx_queue(struct mvpp2_port *port,
+							u32 cause)
+{
+	int queue = fls(cause >> 16) - 1;
+
+	return port->txqs[queue];
+}
+
+/* Handle end of transmission */
+static void mvpp2_txq_done(struct mvpp2_port *port, struct mvpp2_tx_queue *txq,
+			   struct mvpp2_txq_pcpu *txq_pcpu)
+{
+	struct netdev_queue *nq = netdev_get_tx_queue(port->dev, txq->log_id);
+	int tx_done;
+
+	if (txq_pcpu->cpu != smp_processor_id())
+		netdev_err(port->dev, "wrong cpu on the end of Tx processing\n");
+
+	tx_done = mvpp2_txq_sent_desc_proc(port, txq);
+	if (!tx_done)
+		return;
+	mvpp2_txq_bufs_free(port, txq, txq_pcpu, tx_done);
+
+	txq_pcpu->count -= tx_done;
+
+	if (netif_tx_queue_stopped(nq))
+		if (txq_pcpu->size - txq_pcpu->count >= MAX_SKB_FRAGS + 1)
+			netif_tx_wake_queue(nq);
+}
+
+/* Rx/Tx queue initialization/cleanup methods */
+
+/* Allocate and initialize descriptors for aggr TXQ */
+static int mvpp2_aggr_txq_init(struct platform_device *pdev,
+			       struct mvpp2_tx_queue *aggr_txq,
+			       int desc_num, int cpu,
+			       struct mvpp2 *priv)
+{
+	/* Allocate memory for TX descriptors */
+	aggr_txq->descs = dma_alloc_coherent(&pdev->dev,
+				desc_num * MVPP2_DESC_ALIGNED_SIZE,
+				&aggr_txq->descs_phys, GFP_KERNEL);
+	if (!aggr_txq->descs)
+		return -ENOMEM;
+
+	/* Make sure descriptor address is cache line size aligned  */
+	BUG_ON(aggr_txq->descs !=
+	       PTR_ALIGN(aggr_txq->descs, MVPP2_CPU_D_CACHE_LINE_SIZE));
+
+	aggr_txq->last_desc = aggr_txq->size - 1;
+
+	/* Aggr TXQ no reset WA */
+	aggr_txq->next_desc_to_proc = mvpp2_read(priv,
+						 MVPP2_AGGR_TXQ_INDEX_REG(cpu));
+
+	/* Set Tx descriptors queue starting address */
+	/* indirect access */
+	mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_ADDR_REG(cpu),
+		    aggr_txq->descs_phys);
+	mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu), desc_num);
+
+	return 0;
+}
+
+/* Create a specified Rx queue */
+static int mvpp2_rxq_init(struct mvpp2_port *port,
+			  struct mvpp2_rx_queue *rxq)
+
+{
+	rxq->size = port->rx_ring_size;
+
+	/* Allocate memory for RX descriptors */
+	rxq->descs = dma_alloc_coherent(port->dev->dev.parent,
+					rxq->size * MVPP2_DESC_ALIGNED_SIZE,
+					&rxq->descs_phys, GFP_KERNEL);
+	if (!rxq->descs)
+		return -ENOMEM;
+
+	BUG_ON(rxq->descs !=
+	       PTR_ALIGN(rxq->descs, MVPP2_CPU_D_CACHE_LINE_SIZE));
+
+	rxq->last_desc = rxq->size - 1;
+
+	/* Zero occupied and non-occupied counters - direct access */
+	mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
+
+	/* Set Rx descriptors queue starting address - indirect access */
+	mvpp2_write(port->priv, MVPP2_RXQ_NUM_REG, rxq->id);
+	mvpp2_write(port->priv, MVPP2_RXQ_DESC_ADDR_REG, rxq->descs_phys);
+	mvpp2_write(port->priv, MVPP2_RXQ_DESC_SIZE_REG, rxq->size);
+	mvpp2_write(port->priv, MVPP2_RXQ_INDEX_REG, 0);
+
+	/* Set Offset */
+	mvpp2_rxq_offset_set(port, rxq->id, NET_SKB_PAD);
+
+	/* Set coalescing pkts and time */
+	mvpp2_rx_pkts_coal_set(port, rxq, rxq->pkts_coal);
+	mvpp2_rx_time_coal_set(port, rxq, rxq->time_coal);
+
+	/* Add number of descriptors ready for receiving packets */
+	mvpp2_rxq_status_update(port, rxq->id, 0, rxq->size);
+
+	return 0;
+}
+
+/* Push packets received by the RXQ to BM pool */
+static void mvpp2_rxq_drop_pkts(struct mvpp2_port *port,
+				struct mvpp2_rx_queue *rxq)
+{
+	int rx_received, i;
+
+	rx_received = mvpp2_rxq_received(port, rxq->id);
+	if (!rx_received)
+		return;
+
+	for (i = 0; i < rx_received; i++) {
+		struct mvpp2_rx_desc *rx_desc = mvpp2_rxq_next_desc_get(rxq);
+		u32 bm = mvpp2_bm_cookie_build(rx_desc);
+
+		mvpp2_pool_refill(port, bm, rx_desc->buf_phys_addr,
+				  rx_desc->buf_cookie);
+	}
+	mvpp2_rxq_status_update(port, rxq->id, rx_received, rx_received);
+}
+
+/* Cleanup Rx queue */
+static void mvpp2_rxq_deinit(struct mvpp2_port *port,
+			     struct mvpp2_rx_queue *rxq)
+{
+	mvpp2_rxq_drop_pkts(port, rxq);
+
+	if (rxq->descs)
+		dma_free_coherent(port->dev->dev.parent,
+				  rxq->size * MVPP2_DESC_ALIGNED_SIZE,
+				  rxq->descs,
+				  rxq->descs_phys);
+
+	rxq->descs             = NULL;
+	rxq->last_desc         = 0;
+	rxq->next_desc_to_proc = 0;
+	rxq->descs_phys        = 0;
+
+	/* Clear Rx descriptors queue starting address and size;
+	 * free descriptor number
+	 */
+	mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
+	mvpp2_write(port->priv, MVPP2_RXQ_NUM_REG, rxq->id);
+	mvpp2_write(port->priv, MVPP2_RXQ_DESC_ADDR_REG, 0);
+	mvpp2_write(port->priv, MVPP2_RXQ_DESC_SIZE_REG, 0);
+}
+
+/* Create and initialize a Tx queue */
+static int mvpp2_txq_init(struct mvpp2_port *port,
+			  struct mvpp2_tx_queue *txq)
+{
+	u32 val;
+	int cpu, desc, desc_per_txq, tx_port_num;
+	struct mvpp2_txq_pcpu *txq_pcpu;
+
+	txq->size = port->tx_ring_size;
+
+	/* Allocate memory for Tx descriptors */
+	txq->descs = dma_alloc_coherent(port->dev->dev.parent,
+				txq->size * MVPP2_DESC_ALIGNED_SIZE,
+				&txq->descs_phys, GFP_KERNEL);
+	if (!txq->descs)
+		return -ENOMEM;
+
+	/* Make sure descriptor address is cache line size aligned  */
+	BUG_ON(txq->descs !=
+	       PTR_ALIGN(txq->descs, MVPP2_CPU_D_CACHE_LINE_SIZE));
+
+	txq->last_desc = txq->size - 1;
+
+	/* Set Tx descriptors queue starting address - indirect access */
+	mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id);
+	mvpp2_write(port->priv, MVPP2_TXQ_DESC_ADDR_REG, txq->descs_phys);
+	mvpp2_write(port->priv, MVPP2_TXQ_DESC_SIZE_REG, txq->size &
+					     MVPP2_TXQ_DESC_SIZE_MASK);
+	mvpp2_write(port->priv, MVPP2_TXQ_INDEX_REG, 0);
+	mvpp2_write(port->priv, MVPP2_TXQ_RSVD_CLR_REG,
+		    txq->id << MVPP2_TXQ_RSVD_CLR_OFFSET);
+	val = mvpp2_read(port->priv, MVPP2_TXQ_PENDING_REG);
+	val &= ~MVPP2_TXQ_PENDING_MASK;
+	mvpp2_write(port->priv, MVPP2_TXQ_PENDING_REG, val);
+
+	/* Calculate base address in prefetch buffer. We reserve 16 descriptors
+	 * for each existing TXQ.
+	 * TCONTS for PON port must be continuous from 0 to MVPP2_MAX_TCONT
+	 * GBE ports assumed to be continious from 0 to MVPP2_MAX_PORTS
+	 */
+	desc_per_txq = 16;
+	desc = (port->id * MVPP2_MAX_TXQ * desc_per_txq) +
+	       (txq->log_id * desc_per_txq);
+
+	mvpp2_write(port->priv, MVPP2_TXQ_PREF_BUF_REG,
+		    MVPP2_PREF_BUF_PTR(desc) | MVPP2_PREF_BUF_SIZE_16 |
+		    MVPP2_PREF_BUF_THRESH(desc_per_txq/2));
+
+	/* WRR / EJP configuration - indirect access */
+	tx_port_num = mvpp2_egress_port(port);
+	mvpp2_write(port->priv, MVPP2_TXP_SCHED_PORT_INDEX_REG, tx_port_num);
+
+	val = mvpp2_read(port->priv, MVPP2_TXQ_SCHED_REFILL_REG(txq->log_id));
+	val &= ~MVPP2_TXQ_REFILL_PERIOD_ALL_MASK;
+	val |= MVPP2_TXQ_REFILL_PERIOD_MASK(1);
+	val |= MVPP2_TXQ_REFILL_TOKENS_ALL_MASK;
+	mvpp2_write(port->priv, MVPP2_TXQ_SCHED_REFILL_REG(txq->log_id), val);
+
+	val = MVPP2_TXQ_TOKEN_SIZE_MAX;
+	mvpp2_write(port->priv, MVPP2_TXQ_SCHED_TOKEN_SIZE_REG(txq->log_id),
+		    val);
+
+	for_each_present_cpu(cpu) {
+		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
+		txq_pcpu->size = txq->size;
+		txq_pcpu->tx_skb = kmalloc(txq_pcpu->size *
+					   sizeof(*txq_pcpu->tx_skb),
+					   GFP_KERNEL);
+		if (!txq_pcpu->tx_skb) {
+			dma_free_coherent(port->dev->dev.parent,
+					  txq->size * MVPP2_DESC_ALIGNED_SIZE,
+					  txq->descs, txq->descs_phys);
+			return -ENOMEM;
+		}
+
+		txq_pcpu->count = 0;
+		txq_pcpu->reserved_num = 0;
+		txq_pcpu->txq_put_index = 0;
+		txq_pcpu->txq_get_index = 0;
+	}
+
+	return 0;
+}
+
+/* Free allocated TXQ resources */
+static void mvpp2_txq_deinit(struct mvpp2_port *port,
+			     struct mvpp2_tx_queue *txq)
+{
+	struct mvpp2_txq_pcpu *txq_pcpu;
+	int cpu;
+
+	for_each_present_cpu(cpu) {
+		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
+		kfree(txq_pcpu->tx_skb);
+	}
+
+	if (txq->descs)
+		dma_free_coherent(port->dev->dev.parent,
+				  txq->size * MVPP2_DESC_ALIGNED_SIZE,
+				  txq->descs, txq->descs_phys);
+
+	txq->descs             = NULL;
+	txq->last_desc         = 0;
+	txq->next_desc_to_proc = 0;
+	txq->descs_phys        = 0;
+
+	/* Set minimum bandwidth for disabled TXQs */
+	mvpp2_write(port->priv, MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(txq->id), 0);
+
+	/* Set Tx descriptors queue starting address and size */
+	mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id);
+	mvpp2_write(port->priv, MVPP2_TXQ_DESC_ADDR_REG, 0);
+	mvpp2_write(port->priv, MVPP2_TXQ_DESC_SIZE_REG, 0);
+}
+
+/* Cleanup Tx ports */
+static void mvpp2_txq_clean(struct mvpp2_port *port, struct mvpp2_tx_queue *txq)
+{
+	struct mvpp2_txq_pcpu *txq_pcpu;
+	int delay, pending, cpu;
+	u32 val;
+
+	mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id);
+	val = mvpp2_read(port->priv, MVPP2_TXQ_PREF_BUF_REG);
+	val |= MVPP2_TXQ_DRAIN_EN_MASK;
+	mvpp2_write(port->priv, MVPP2_TXQ_PREF_BUF_REG, val);
+
+	/* The napi queue has been stopped so wait for all packets
+	 * to be transmitted.
+	 */
+	delay = 0;
+	do {
+		if (delay >= MVPP2_TX_PENDING_TIMEOUT_MSEC) {
+			netdev_warn(port->dev,
+				    "port %d: cleaning queue %d timed out\n",
+				    port->id, txq->log_id);
+			break;
+		}
+		mdelay(1);
+		delay++;
+
+		pending = mvpp2_txq_pend_desc_num_get(port, txq);
+	} while (pending);
+
+	val &= ~MVPP2_TXQ_DRAIN_EN_MASK;
+	mvpp2_write(port->priv, MVPP2_TXQ_PREF_BUF_REG, val);
+
+	for_each_present_cpu(cpu) {
+		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
+
+		/* Release all packets */
+		mvpp2_txq_bufs_free(port, txq, txq_pcpu, txq_pcpu->count);
+
+		/* Reset queue */
+		txq_pcpu->count = 0;
+		txq_pcpu->txq_put_index = 0;
+		txq_pcpu->txq_get_index = 0;
+	}
+}
+
+/* Cleanup all Tx queues */
+static void mvpp2_cleanup_txqs(struct mvpp2_port *port)
+{
+	struct mvpp2_tx_queue *txq;
+	int queue;
+	u32 val;
+
+	val = mvpp2_read(port->priv, MVPP2_TX_PORT_FLUSH_REG);
+
+	/* Reset Tx ports and delete Tx queues */
+	val |= MVPP2_TX_PORT_FLUSH_MASK(port->id);
+	mvpp2_write(port->priv, MVPP2_TX_PORT_FLUSH_REG, val);
+
+	for (queue = 0; queue < txq_number; queue++) {
+		txq = port->txqs[queue];
+		mvpp2_txq_clean(port, txq);
+		mvpp2_txq_deinit(port, txq);
+	}
+
+	on_each_cpu(mvpp2_txq_sent_counter_clear, port, 1);
+
+	val &= ~MVPP2_TX_PORT_FLUSH_MASK(port->id);
+	mvpp2_write(port->priv, MVPP2_TX_PORT_FLUSH_REG, val);
+}
+
+/* Cleanup all Rx queues */
+static void mvpp2_cleanup_rxqs(struct mvpp2_port *port)
+{
+	int queue;
+
+	for (queue = 0; queue < rxq_number; queue++)
+		mvpp2_rxq_deinit(port, port->rxqs[queue]);
+}
+
+/* Init all Rx queues for port */
+static int mvpp2_setup_rxqs(struct mvpp2_port *port)
+{
+	int queue, err;
+
+	for (queue = 0; queue < rxq_number; queue++) {
+		err = mvpp2_rxq_init(port, port->rxqs[queue]);
+		if (err)
+			goto err_cleanup;
+	}
+	return 0;
+
+err_cleanup:
+	mvpp2_cleanup_rxqs(port);
+	return err;
+}
+
+/* Init all tx queues for port */
+static int mvpp2_setup_txqs(struct mvpp2_port *port)
+{
+	struct mvpp2_tx_queue *txq;
+	int queue, err;
+
+	for (queue = 0; queue < txq_number; queue++) {
+		txq = port->txqs[queue];
+		err = mvpp2_txq_init(port, txq);
+		if (err)
+			goto err_cleanup;
+	}
+
+	on_each_cpu(mvpp2_tx_done_pkts_coal_set, port, 1);
+	on_each_cpu(mvpp2_txq_sent_counter_clear, port, 1);
+	return 0;
+
+err_cleanup:
+	mvpp2_cleanup_txqs(port);
+	return err;
+}
+
+/* The callback for per-port interrupt */
+static irqreturn_t mvpp2_isr(int irq, void *dev_id)
+{
+	struct mvpp2_port *port = (struct mvpp2_port *)dev_id;
+
+	mvpp2_interrupts_disable(port);
+
+	napi_schedule(&port->napi);
+
+	return IRQ_HANDLED;
+}
+
+/* Adjust link */
+static void mvpp2_link_event(struct net_device *dev)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	struct phy_device *phydev = port->phy_dev;
+	int status_change = 0;
+	u32 val;
+
+	if (phydev->link) {
+		if ((port->speed != phydev->speed) ||
+		    (port->duplex != phydev->duplex)) {
+			u32 val;
+
+			val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+			val &= ~(MVPP2_GMAC_CONFIG_MII_SPEED |
+				 MVPP2_GMAC_CONFIG_GMII_SPEED |
+				 MVPP2_GMAC_CONFIG_FULL_DUPLEX |
+				 MVPP2_GMAC_AN_SPEED_EN |
+				 MVPP2_GMAC_AN_DUPLEX_EN);
+
+			if (phydev->duplex)
+				val |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+
+			if (phydev->speed == SPEED_1000)
+				val |= MVPP2_GMAC_CONFIG_GMII_SPEED;
+			else
+				val |= MVPP2_GMAC_CONFIG_MII_SPEED;
+
+			writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+			port->duplex = phydev->duplex;
+			port->speed  = phydev->speed;
+		}
+	}
+
+	if (phydev->link != port->link) {
+		if (!phydev->link) {
+			port->duplex = -1;
+			port->speed = 0;
+		}
+
+		port->link = phydev->link;
+		status_change = 1;
+	}
+
+	if (status_change) {
+		if (phydev->link) {
+			val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+			val |= (MVPP2_GMAC_FORCE_LINK_PASS |
+				MVPP2_GMAC_FORCE_LINK_DOWN);
+			writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+			mvpp2_egress_enable(port);
+			mvpp2_ingress_enable(port);
+		} else {
+			mvpp2_ingress_disable(port);
+			mvpp2_egress_disable(port);
+		}
+		phy_print_status(phydev);
+	}
+}
+
+/* Main RX/TX processing routines */
+
+/* Display more error info */
+static void mvpp2_rx_error(struct mvpp2_port *port,
+			   struct mvpp2_rx_desc *rx_desc)
+{
+	u32 status = rx_desc->status;
+
+	switch (status & MVPP2_RXD_ERR_CODE_MASK) {
+	case MVPP2_RXD_ERR_CRC:
+		netdev_err(port->dev, "bad rx status %08x (crc error), size=%d\n",
+			   status, rx_desc->data_size);
+		break;
+	case MVPP2_RXD_ERR_OVERRUN:
+		netdev_err(port->dev, "bad rx status %08x (overrun error), size=%d\n",
+			   status, rx_desc->data_size);
+		break;
+	case MVPP2_RXD_ERR_RESOURCE:
+		netdev_err(port->dev, "bad rx status %08x (resource error), size=%d\n",
+			   status, rx_desc->data_size);
+		break;
+	}
+}
+
+/* Handle RX checksum offload */
+static void mvpp2_rx_csum(struct mvpp2_port *port, u32 status,
+			  struct sk_buff *skb)
+{
+	if (((status & MVPP2_RXD_L3_IP4) &&
+	     !(status & MVPP2_RXD_IP4_HEADER_ERR)) ||
+	    (status & MVPP2_RXD_L3_IP6))
+		if (((status & MVPP2_RXD_L4_UDP) ||
+		     (status & MVPP2_RXD_L4_TCP)) &&
+		     (status & MVPP2_RXD_L4_CSUM_OK)) {
+			skb->csum = 0;
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			return;
+		}
+
+	skb->ip_summed = CHECKSUM_NONE;
+}
+
+/* Reuse skb if possible, or allocate a new skb and add it to BM pool */
+static int mvpp2_rx_refill(struct mvpp2_port *port,
+			   struct mvpp2_bm_pool *bm_pool,
+			   u32 bm, int is_recycle)
+{
+	struct sk_buff *skb;
+	dma_addr_t phys_addr;
+
+	if (is_recycle &&
+	    (atomic_read(&bm_pool->in_use) < bm_pool->in_use_thresh))
+		return 0;
+
+	/* No recycle or too many buffers are in use, so allocate a new skb */
+	skb = mvpp2_skb_alloc(port, bm_pool, &phys_addr, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	mvpp2_pool_refill(port, bm, (u32)phys_addr, (u32)skb);
+	atomic_dec(&bm_pool->in_use);
+	return 0;
+}
+
+/* Handle tx checksum */
+static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb)
+{
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		int ip_hdr_len = 0;
+		u8 l4_proto;
+
+		if (skb->protocol == htons(ETH_P_IP)) {
+			struct iphdr *ip4h = ip_hdr(skb);
+
+			/* Calculate IPv4 checksum and L4 checksum */
+			ip_hdr_len = ip4h->ihl;
+			l4_proto = ip4h->protocol;
+		} else if (skb->protocol == htons(ETH_P_IPV6)) {
+			struct ipv6hdr *ip6h = ipv6_hdr(skb);
+
+			/* Read l4_protocol from one of IPv6 extra headers */
+			if (skb_network_header_len(skb) > 0)
+				ip_hdr_len = (skb_network_header_len(skb) >> 2);
+			l4_proto = ip6h->nexthdr;
+		} else {
+			return MVPP2_TXD_L4_CSUM_NOT;
+		}
+
+		return mvpp2_txq_desc_csum(skb_network_offset(skb),
+				skb->protocol, ip_hdr_len, l4_proto);
+	}
+
+	return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE;
+}
+
+static void mvpp2_buff_hdr_rx(struct mvpp2_port *port,
+			      struct mvpp2_rx_desc *rx_desc)
+{
+	struct mvpp2_buff_hdr *buff_hdr;
+	struct sk_buff *skb;
+	u32 rx_status = rx_desc->status;
+	u32 buff_phys_addr;
+	u32 buff_virt_addr;
+	u32 buff_phys_addr_next;
+	u32 buff_virt_addr_next;
+	int mc_id;
+	int pool_id;
+
+	pool_id = (rx_status & MVPP2_RXD_BM_POOL_ID_MASK) >>
+		   MVPP2_RXD_BM_POOL_ID_OFFS;
+	buff_phys_addr = rx_desc->buf_phys_addr;
+	buff_virt_addr = rx_desc->buf_cookie;
+
+	do {
+		skb = (struct sk_buff *)buff_virt_addr;
+		buff_hdr = (struct mvpp2_buff_hdr *)skb->head;
+
+		mc_id = MVPP2_B_HDR_INFO_MC_ID(buff_hdr->info);
+
+		buff_phys_addr_next = buff_hdr->next_buff_phys_addr;
+		buff_virt_addr_next = buff_hdr->next_buff_virt_addr;
+
+		/* Release buffer */
+		mvpp2_bm_pool_mc_put(port, pool_id, buff_phys_addr,
+				     buff_virt_addr, mc_id);
+
+		buff_phys_addr = buff_phys_addr_next;
+		buff_virt_addr = buff_virt_addr_next;
+
+	} while (!MVPP2_B_HDR_INFO_IS_LAST(buff_hdr->info));
+}
+
+/* Main rx processing */
+static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
+		    struct mvpp2_rx_queue *rxq)
+{
+	struct net_device *dev = port->dev;
+	int rx_received, rx_filled, i;
+	u32 rcvd_pkts = 0;
+	u32 rcvd_bytes = 0;
+
+	/* Get number of received packets and clamp the to-do */
+	rx_received = mvpp2_rxq_received(port, rxq->id);
+	if (rx_todo > rx_received)
+		rx_todo = rx_received;
+
+	rx_filled = 0;
+	for (i = 0; i < rx_todo; i++) {
+		struct mvpp2_rx_desc *rx_desc = mvpp2_rxq_next_desc_get(rxq);
+		struct mvpp2_bm_pool *bm_pool;
+		struct sk_buff *skb;
+		u32 bm, rx_status;
+		int pool, rx_bytes, err;
+
+		rx_filled++;
+		rx_status = rx_desc->status;
+		rx_bytes = rx_desc->data_size - MVPP2_MH_SIZE;
+
+		bm = mvpp2_bm_cookie_build(rx_desc);
+		pool = mvpp2_bm_cookie_pool_get(bm);
+		bm_pool = &port->priv->bm_pools[pool];
+		/* Check if buffer header is used */
+		if (rx_status & MVPP2_RXD_BUF_HDR) {
+			mvpp2_buff_hdr_rx(port, rx_desc);
+			continue;
+		}
+
+		/* In case of an error, release the requested buffer pointer
+		 * to the Buffer Manager. This request process is controlled
+		 * by the hardware, and the information about the buffer is
+		 * comprised by the RX descriptor.
+		 */
+		if (rx_status & MVPP2_RXD_ERR_SUMMARY) {
+			dev->stats.rx_errors++;
+			mvpp2_rx_error(port, rx_desc);
+			mvpp2_pool_refill(port, bm, rx_desc->buf_phys_addr,
+					  rx_desc->buf_cookie);
+			continue;
+		}
+
+		skb = (struct sk_buff *)rx_desc->buf_cookie;
+
+		rcvd_pkts++;
+		rcvd_bytes += rx_bytes;
+		atomic_inc(&bm_pool->in_use);
+
+		skb_reserve(skb, MVPP2_MH_SIZE);
+		skb_put(skb, rx_bytes);
+		skb->protocol = eth_type_trans(skb, dev);
+		mvpp2_rx_csum(port, rx_status, skb);
+
+		napi_gro_receive(&port->napi, skb);
+
+		err = mvpp2_rx_refill(port, bm_pool, bm, 0);
+		if (err) {
+			netdev_err(port->dev, "failed to refill BM pools\n");
+			rx_filled--;
+		}
+	}
+
+	if (rcvd_pkts) {
+		struct mvpp2_pcpu_stats *stats = this_cpu_ptr(port->stats);
+
+		u64_stats_update_begin(&stats->syncp);
+		stats->rx_packets += rcvd_pkts;
+		stats->rx_bytes   += rcvd_bytes;
+		u64_stats_update_end(&stats->syncp);
+	}
+
+	/* Update Rx queue management counters */
+	wmb();
+	mvpp2_rxq_status_update(port, rxq->id, rx_todo, rx_filled);
+
+	return rx_todo;
+}
+
+static inline void
+tx_desc_unmap_put(struct device *dev, struct mvpp2_tx_queue *txq,
+		  struct mvpp2_tx_desc *desc)
+{
+	dma_unmap_single(dev, desc->buf_phys_addr,
+			 desc->data_size, DMA_TO_DEVICE);
+	mvpp2_txq_desc_put(txq);
+}
+
+/* Handle tx fragmentation processing */
+static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb,
+				 struct mvpp2_tx_queue *aggr_txq,
+				 struct mvpp2_tx_queue *txq)
+{
+	struct mvpp2_txq_pcpu *txq_pcpu = this_cpu_ptr(txq->pcpu);
+	struct mvpp2_tx_desc *tx_desc;
+	int i;
+	dma_addr_t buf_phys_addr;
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+		void *addr = page_address(frag->page.p) + frag->page_offset;
+
+		tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
+		tx_desc->phys_txq = txq->id;
+		tx_desc->data_size = frag->size;
+
+		buf_phys_addr = dma_map_single(port->dev->dev.parent, addr,
+					       tx_desc->data_size,
+					       DMA_TO_DEVICE);
+		if (dma_mapping_error(port->dev->dev.parent, buf_phys_addr)) {
+			mvpp2_txq_desc_put(txq);
+			goto error;
+		}
+
+		tx_desc->packet_offset = buf_phys_addr & MVPP2_TX_DESC_ALIGN;
+		tx_desc->buf_phys_addr = buf_phys_addr & (~MVPP2_TX_DESC_ALIGN);
+
+		if (i == (skb_shinfo(skb)->nr_frags - 1)) {
+			/* Last descriptor */
+			tx_desc->command = MVPP2_TXD_L_DESC;
+			mvpp2_txq_inc_put(txq_pcpu, skb);
+		} else {
+			/* Descriptor in the middle: Not First, Not Last */
+			tx_desc->command = 0;
+			mvpp2_txq_inc_put(txq_pcpu, NULL);
+		}
+	}
+
+	return 0;
+
+error:
+	/* Release all descriptors that were used to map fragments of
+	 * this packet, as well as the corresponding DMA mappings
+	 */
+	for (i = i - 1; i >= 0; i--) {
+		tx_desc = txq->descs + i;
+		tx_desc_unmap_put(port->dev->dev.parent, txq, tx_desc);
+	}
+
+	return -ENOMEM;
+}
+
+/* Main tx processing */
+static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2_tx_queue *txq, *aggr_txq;
+	struct mvpp2_txq_pcpu *txq_pcpu;
+	struct mvpp2_tx_desc *tx_desc;
+	dma_addr_t buf_phys_addr;
+	int frags = 0;
+	u16 txq_id;
+	u32 tx_cmd;
+
+	txq_id = skb_get_queue_mapping(skb);
+	txq = port->txqs[txq_id];
+	txq_pcpu = this_cpu_ptr(txq->pcpu);
+	aggr_txq = &port->priv->aggr_txqs[smp_processor_id()];
+
+	frags = skb_shinfo(skb)->nr_frags + 1;
+
+	/* Check number of available descriptors */
+	if (mvpp2_aggr_desc_num_check(port->priv, aggr_txq, frags) ||
+	    mvpp2_txq_reserved_desc_num_proc(port->priv, txq,
+					     txq_pcpu, frags)) {
+		frags = 0;
+		goto out;
+	}
+
+	/* Get a descriptor for the first part of the packet */
+	tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
+	tx_desc->phys_txq = txq->id;
+	tx_desc->data_size = skb_headlen(skb);
+
+	buf_phys_addr = dma_map_single(dev->dev.parent, skb->data,
+				       tx_desc->data_size, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev->dev.parent, buf_phys_addr))) {
+		mvpp2_txq_desc_put(txq);
+		frags = 0;
+		goto out;
+	}
+	tx_desc->packet_offset = buf_phys_addr & MVPP2_TX_DESC_ALIGN;
+	tx_desc->buf_phys_addr = buf_phys_addr & ~MVPP2_TX_DESC_ALIGN;
+
+	tx_cmd = mvpp2_skb_tx_csum(port, skb);
+
+	if (frags == 1) {
+		/* First and Last descriptor */
+		tx_cmd |= MVPP2_TXD_F_DESC | MVPP2_TXD_L_DESC;
+		tx_desc->command = tx_cmd;
+		mvpp2_txq_inc_put(txq_pcpu, skb);
+	} else {
+		/* First but not Last */
+		tx_cmd |= MVPP2_TXD_F_DESC | MVPP2_TXD_PADDING_DISABLE;
+		tx_desc->command = tx_cmd;
+		mvpp2_txq_inc_put(txq_pcpu, NULL);
+
+		/* Continue with other skb fragments */
+		if (mvpp2_tx_frag_process(port, skb, aggr_txq, txq)) {
+			tx_desc_unmap_put(port->dev->dev.parent, txq, tx_desc);
+			frags = 0;
+			goto out;
+		}
+	}
+
+	txq_pcpu->reserved_num -= frags;
+	txq_pcpu->count += frags;
+	aggr_txq->count += frags;
+
+	/* Enable transmit */
+	wmb();
+	mvpp2_aggr_txq_pend_desc_add(port, frags);
+
+	if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1) {
+		struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
+
+		netif_tx_stop_queue(nq);
+	}
+out:
+	if (frags > 0) {
+		struct mvpp2_pcpu_stats *stats = this_cpu_ptr(port->stats);
+
+		u64_stats_update_begin(&stats->syncp);
+		stats->tx_packets++;
+		stats->tx_bytes += skb->len;
+		u64_stats_update_end(&stats->syncp);
+	} else {
+		dev->stats.tx_dropped++;
+		dev_kfree_skb_any(skb);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static inline void mvpp2_cause_error(struct net_device *dev, int cause)
+{
+	if (cause & MVPP2_CAUSE_FCS_ERR_MASK)
+		netdev_err(dev, "FCS error\n");
+	if (cause & MVPP2_CAUSE_RX_FIFO_OVERRUN_MASK)
+		netdev_err(dev, "rx fifo overrun error\n");
+	if (cause & MVPP2_CAUSE_TX_FIFO_UNDERRUN_MASK)
+		netdev_err(dev, "tx fifo underrun error\n");
+}
+
+static void mvpp2_txq_done_percpu(void *arg)
+{
+	struct mvpp2_port *port = arg;
+	u32 cause_rx_tx, cause_tx, cause_misc;
+
+	/* Rx/Tx cause register
+	 *
+	 * Bits 0-15: each bit indicates received packets on the Rx queue
+	 * (bit 0 is for Rx queue 0).
+	 *
+	 * Bits 16-23: each bit indicates transmitted packets on the Tx queue
+	 * (bit 16 is for Tx queue 0).
+	 *
+	 * Each CPU has its own Rx/Tx cause register
+	 */
+	cause_rx_tx = mvpp2_read(port->priv,
+				 MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
+	cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
+	cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;
+
+	if (cause_misc) {
+		mvpp2_cause_error(port->dev, cause_misc);
+
+		/* Clear the cause register */
+		mvpp2_write(port->priv, MVPP2_ISR_MISC_CAUSE_REG, 0);
+		mvpp2_write(port->priv, MVPP2_ISR_RX_TX_CAUSE_REG(port->id),
+			    cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK);
+	}
+
+	/* Release TX descriptors */
+	if (cause_tx) {
+		struct mvpp2_tx_queue *txq = mvpp2_get_tx_queue(port, cause_tx);
+		struct mvpp2_txq_pcpu *txq_pcpu = this_cpu_ptr(txq->pcpu);
+
+		if (txq_pcpu->count)
+			mvpp2_txq_done(port, txq, txq_pcpu);
+	}
+}
+
+static int mvpp2_poll(struct napi_struct *napi, int budget)
+{
+	u32 cause_rx_tx, cause_rx;
+	int rx_done = 0;
+	struct mvpp2_port *port = netdev_priv(napi->dev);
+
+	on_each_cpu(mvpp2_txq_done_percpu, port, 1);
+
+	cause_rx_tx = mvpp2_read(port->priv,
+				 MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
+	cause_rx = cause_rx_tx & MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+
+	/* Process RX packets */
+	cause_rx |= port->pending_cause_rx;
+	while (cause_rx && budget > 0) {
+		int count;
+		struct mvpp2_rx_queue *rxq;
+
+		rxq = mvpp2_get_rx_queue(port, cause_rx);
+		if (!rxq)
+			break;
+
+		count = mvpp2_rx(port, budget, rxq);
+		rx_done += count;
+		budget -= count;
+		if (budget > 0) {
+			/* Clear the bit associated to this Rx queue
+			 * so that next iteration will continue from
+			 * the next Rx queue.
+			 */
+			cause_rx &= ~(1 << rxq->logic_rxq);
+		}
+	}
+
+	if (budget > 0) {
+		cause_rx = 0;
+		napi_complete(napi);
+
+		mvpp2_interrupts_enable(port);
+	}
+	port->pending_cause_rx = cause_rx;
+	return rx_done;
+}
+
+/* Set hw internals when starting port */
+static void mvpp2_start_dev(struct mvpp2_port *port)
+{
+	mvpp2_gmac_max_rx_size_set(port);
+	mvpp2_txp_max_tx_size_set(port);
+
+	napi_enable(&port->napi);
+
+	/* Enable interrupts on all CPUs */
+	mvpp2_interrupts_enable(port);
+
+	mvpp2_port_enable(port);
+	phy_start(port->phy_dev);
+	netif_tx_start_all_queues(port->dev);
+}
+
+/* Set hw internals when stopping port */
+static void mvpp2_stop_dev(struct mvpp2_port *port)
+{
+	/* Stop new packets from arriving to RXQs */
+	mvpp2_ingress_disable(port);
+
+	mdelay(10);
+
+	/* Disable interrupts on all CPUs */
+	mvpp2_interrupts_disable(port);
+
+	napi_disable(&port->napi);
+
+	netif_carrier_off(port->dev);
+	netif_tx_stop_all_queues(port->dev);
+
+	mvpp2_egress_disable(port);
+	mvpp2_port_disable(port);
+	phy_stop(port->phy_dev);
+}
+
+/* Return positive if MTU is valid */
+static inline int mvpp2_check_mtu_valid(struct net_device *dev, int mtu)
+{
+	if (mtu < 68) {
+		netdev_err(dev, "cannot change mtu to less than 68\n");
+		return -EINVAL;
+	}
+
+	/* 9676 == 9700 - 20 and rounding to 8 */
+	if (mtu > 9676) {
+		netdev_info(dev, "illegal MTU value %d, round to 9676\n", mtu);
+		mtu = 9676;
+	}
+
+	if (!IS_ALIGNED(MVPP2_RX_PKT_SIZE(mtu), 8)) {
+		netdev_info(dev, "illegal MTU value %d, round to %d\n", mtu,
+			    ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8));
+		mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8);
+	}
+
+	return mtu;
+}
+
+static int mvpp2_check_ringparam_valid(struct net_device *dev,
+				       struct ethtool_ringparam *ring)
+{
+	u16 new_rx_pending = ring->rx_pending;
+	u16 new_tx_pending = ring->tx_pending;
+
+	if (ring->rx_pending == 0 || ring->tx_pending == 0)
+		return -EINVAL;
+
+	if (ring->rx_pending > MVPP2_MAX_RXD)
+		new_rx_pending = MVPP2_MAX_RXD;
+	else if (!IS_ALIGNED(ring->rx_pending, 16))
+		new_rx_pending = ALIGN(ring->rx_pending, 16);
+
+	if (ring->tx_pending > MVPP2_MAX_TXD)
+		new_tx_pending = MVPP2_MAX_TXD;
+	else if (!IS_ALIGNED(ring->tx_pending, 32))
+		new_tx_pending = ALIGN(ring->tx_pending, 32);
+
+	if (ring->rx_pending != new_rx_pending) {
+		netdev_info(dev, "illegal Rx ring size value %d, round to %d\n",
+			    ring->rx_pending, new_rx_pending);
+		ring->rx_pending = new_rx_pending;
+	}
+
+	if (ring->tx_pending != new_tx_pending) {
+		netdev_info(dev, "illegal Tx ring size value %d, round to %d\n",
+			    ring->tx_pending, new_tx_pending);
+		ring->tx_pending = new_tx_pending;
+	}
+
+	return 0;
+}
+
+static void mvpp2_get_mac_address(struct mvpp2_port *port, unsigned char *addr)
+{
+	u32 mac_addr_l, mac_addr_m, mac_addr_h;
+
+	mac_addr_l = readl(port->base + MVPP2_GMAC_CTRL_1_REG);
+	mac_addr_m = readl(port->priv->lms_base + MVPP2_SRC_ADDR_MIDDLE);
+	mac_addr_h = readl(port->priv->lms_base + MVPP2_SRC_ADDR_HIGH);
+	addr[0] = (mac_addr_h >> 24) & 0xFF;
+	addr[1] = (mac_addr_h >> 16) & 0xFF;
+	addr[2] = (mac_addr_h >> 8) & 0xFF;
+	addr[3] = mac_addr_h & 0xFF;
+	addr[4] = mac_addr_m & 0xFF;
+	addr[5] = (mac_addr_l >> MVPP2_GMAC_SA_LOW_OFFS) & 0xFF;
+}
+
+static int mvpp2_phy_connect(struct mvpp2_port *port)
+{
+	struct phy_device *phy_dev;
+
+	phy_dev = of_phy_connect(port->dev, port->phy_node, mvpp2_link_event, 0,
+				 port->phy_interface);
+	if (!phy_dev) {
+		netdev_err(port->dev, "cannot connect to phy\n");
+		return -ENODEV;
+	}
+	phy_dev->supported &= PHY_GBIT_FEATURES;
+	phy_dev->advertising = phy_dev->supported;
+
+	port->phy_dev = phy_dev;
+	port->link    = 0;
+	port->duplex  = 0;
+	port->speed   = 0;
+
+	return 0;
+}
+
+static void mvpp2_phy_disconnect(struct mvpp2_port *port)
+{
+	phy_disconnect(port->phy_dev);
+	port->phy_dev = NULL;
+}
+
+static int mvpp2_open(struct net_device *dev)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	unsigned char mac_bcast[ETH_ALEN] = {
+			0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	int err;
+
+	err = mvpp2_prs_mac_da_accept(port->priv, port->id, mac_bcast, true);
+	if (err) {
+		netdev_err(dev, "mvpp2_prs_mac_da_accept BC failed\n");
+		return err;
+	}
+	err = mvpp2_prs_mac_da_accept(port->priv, port->id,
+				      dev->dev_addr, true);
+	if (err) {
+		netdev_err(dev, "mvpp2_prs_mac_da_accept MC failed\n");
+		return err;
+	}
+	err = mvpp2_prs_tag_mode_set(port->priv, port->id, MVPP2_TAG_TYPE_MH);
+	if (err) {
+		netdev_err(dev, "mvpp2_prs_tag_mode_set failed\n");
+		return err;
+	}
+	err = mvpp2_prs_def_flow(port);
+	if (err) {
+		netdev_err(dev, "mvpp2_prs_def_flow failed\n");
+		return err;
+	}
+
+	/* Allocate the Rx/Tx queues */
+	err = mvpp2_setup_rxqs(port);
+	if (err) {
+		netdev_err(port->dev, "cannot allocate Rx queues\n");
+		return err;
+	}
+
+	err = mvpp2_setup_txqs(port);
+	if (err) {
+		netdev_err(port->dev, "cannot allocate Tx queues\n");
+		goto err_cleanup_rxqs;
+	}
+
+	err = request_irq(port->irq, mvpp2_isr, 0, dev->name, port);
+	if (err) {
+		netdev_err(port->dev, "cannot request IRQ %d\n", port->irq);
+		goto err_cleanup_txqs;
+	}
+
+	/* In default link is down */
+	netif_carrier_off(port->dev);
+
+	err = mvpp2_phy_connect(port);
+	if (err < 0)
+		goto err_free_irq;
+
+	/* Unmask interrupts on all CPUs */
+	on_each_cpu(mvpp2_interrupts_unmask, port, 1);
+
+	mvpp2_start_dev(port);
+
+	return 0;
+
+err_free_irq:
+	free_irq(port->irq, port);
+err_cleanup_txqs:
+	mvpp2_cleanup_txqs(port);
+err_cleanup_rxqs:
+	mvpp2_cleanup_rxqs(port);
+	return err;
+}
+
+static int mvpp2_stop(struct net_device *dev)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	mvpp2_stop_dev(port);
+	mvpp2_phy_disconnect(port);
+
+	/* Mask interrupts on all CPUs */
+	on_each_cpu(mvpp2_interrupts_mask, port, 1);
+
+	free_irq(port->irq, port);
+	mvpp2_cleanup_rxqs(port);
+	mvpp2_cleanup_txqs(port);
+
+	return 0;
+}
+
+static void mvpp2_set_rx_mode(struct net_device *dev)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2 *priv = port->priv;
+	struct netdev_hw_addr *ha;
+	int id = port->id;
+	bool allmulti = dev->flags & IFF_ALLMULTI;
+
+	mvpp2_prs_mac_promisc_set(priv, id, dev->flags & IFF_PROMISC);
+	mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_ALL, allmulti);
+	mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_IP6, allmulti);
+
+	/* Remove all port->id's mcast enries */
+	mvpp2_prs_mcast_del_all(priv, id);
+
+	if (allmulti && !netdev_mc_empty(dev)) {
+		netdev_for_each_mc_addr(ha, dev)
+			mvpp2_prs_mac_da_accept(priv, id, ha->addr, true);
+	}
+}
+
+static int mvpp2_set_mac_address(struct net_device *dev, void *p)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	const struct sockaddr *addr = p;
+	int err;
+
+	if (!is_valid_ether_addr(addr->sa_data)) {
+		err = -EADDRNOTAVAIL;
+		goto error;
+	}
+
+	if (!netif_running(dev)) {
+		err = mvpp2_prs_update_mac_da(dev, addr->sa_data);
+		if (!err)
+			return 0;
+		/* Reconfigure parser to accept the original MAC address */
+		err = mvpp2_prs_update_mac_da(dev, dev->dev_addr);
+		if (err)
+			goto error;
+	}
+
+	mvpp2_stop_dev(port);
+
+	err = mvpp2_prs_update_mac_da(dev, addr->sa_data);
+	if (!err)
+		goto out_start;
+
+	/* Reconfigure parser accept the original MAC address */
+	err = mvpp2_prs_update_mac_da(dev, dev->dev_addr);
+	if (err)
+		goto error;
+out_start:
+	mvpp2_start_dev(port);
+	mvpp2_egress_enable(port);
+	mvpp2_ingress_enable(port);
+	return 0;
+
+error:
+	netdev_err(dev, "fail to change MAC address\n");
+	return err;
+}
+
+static int mvpp2_change_mtu(struct net_device *dev, int mtu)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int err;
+
+	mtu = mvpp2_check_mtu_valid(dev, mtu);
+	if (mtu < 0) {
+		err = mtu;
+		goto error;
+	}
+
+	if (!netif_running(dev)) {
+		err = mvpp2_bm_update_mtu(dev, mtu);
+		if (!err) {
+			port->pkt_size =  MVPP2_RX_PKT_SIZE(mtu);
+			return 0;
+		}
+
+		/* Reconfigure BM to the original MTU */
+		err = mvpp2_bm_update_mtu(dev, dev->mtu);
+		if (err)
+			goto error;
+	}
+
+	mvpp2_stop_dev(port);
+
+	err = mvpp2_bm_update_mtu(dev, mtu);
+	if (!err) {
+		port->pkt_size =  MVPP2_RX_PKT_SIZE(mtu);
+		goto out_start;
+	}
+
+	/* Reconfigure BM to the original MTU */
+	err = mvpp2_bm_update_mtu(dev, dev->mtu);
+	if (err)
+		goto error;
+
+out_start:
+	mvpp2_start_dev(port);
+	mvpp2_egress_enable(port);
+	mvpp2_ingress_enable(port);
+
+	return 0;
+
+error:
+	netdev_err(dev, "fail to change MTU\n");
+	return err;
+}
+
+static struct rtnl_link_stats64 *
+mvpp2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	unsigned int start;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct mvpp2_pcpu_stats *cpu_stats;
+		u64 rx_packets;
+		u64 rx_bytes;
+		u64 tx_packets;
+		u64 tx_bytes;
+
+		cpu_stats = per_cpu_ptr(port->stats, cpu);
+		do {
+			start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+			rx_packets = cpu_stats->rx_packets;
+			rx_bytes   = cpu_stats->rx_bytes;
+			tx_packets = cpu_stats->tx_packets;
+			tx_bytes   = cpu_stats->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+		stats->rx_packets += rx_packets;
+		stats->rx_bytes   += rx_bytes;
+		stats->tx_packets += tx_packets;
+		stats->tx_bytes   += tx_bytes;
+	}
+
+	stats->rx_errors	= dev->stats.rx_errors;
+	stats->rx_dropped	= dev->stats.rx_dropped;
+	stats->tx_dropped	= dev->stats.tx_dropped;
+
+	return stats;
+}
+
+/* Ethtool methods */
+
+/* Get settings (phy address, speed) for ethtools */
+static int mvpp2_ethtool_get_settings(struct net_device *dev,
+				      struct ethtool_cmd *cmd)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!port->phy_dev)
+		return -ENODEV;
+	return phy_ethtool_gset(port->phy_dev, cmd);
+}
+
+/* Set settings (phy address, speed) for ethtools */
+static int mvpp2_ethtool_set_settings(struct net_device *dev,
+				      struct ethtool_cmd *cmd)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!port->phy_dev)
+		return -ENODEV;
+	return phy_ethtool_sset(port->phy_dev, cmd);
+}
+
+/* Set interrupt coalescing for ethtools */
+static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
+				      struct ethtool_coalesce *c)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int queue;
+
+	for (queue = 0; queue < rxq_number; queue++) {
+		struct mvpp2_rx_queue *rxq = port->rxqs[queue];
+
+		rxq->time_coal = c->rx_coalesce_usecs;
+		rxq->pkts_coal = c->rx_max_coalesced_frames;
+		mvpp2_rx_pkts_coal_set(port, rxq, rxq->pkts_coal);
+		mvpp2_rx_time_coal_set(port, rxq, rxq->time_coal);
+	}
+
+	for (queue = 0; queue < txq_number; queue++) {
+		struct mvpp2_tx_queue *txq = port->txqs[queue];
+
+		txq->done_pkts_coal = c->tx_max_coalesced_frames;
+	}
+
+	on_each_cpu(mvpp2_tx_done_pkts_coal_set, port, 1);
+	return 0;
+}
+
+/* get coalescing for ethtools */
+static int mvpp2_ethtool_get_coalesce(struct net_device *dev,
+				      struct ethtool_coalesce *c)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	c->rx_coalesce_usecs        = port->rxqs[0]->time_coal;
+	c->rx_max_coalesced_frames  = port->rxqs[0]->pkts_coal;
+	c->tx_max_coalesced_frames =  port->txqs[0]->done_pkts_coal;
+	return 0;
+}
+
+static void mvpp2_ethtool_get_drvinfo(struct net_device *dev,
+				      struct ethtool_drvinfo *drvinfo)
+{
+	strlcpy(drvinfo->driver, MVPP2_DRIVER_NAME,
+		sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, MVPP2_DRIVER_VERSION,
+		sizeof(drvinfo->version));
+	strlcpy(drvinfo->bus_info, dev_name(&dev->dev),
+		sizeof(drvinfo->bus_info));
+}
+
+static void mvpp2_ethtool_get_ringparam(struct net_device *dev,
+					struct ethtool_ringparam *ring)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	ring->rx_max_pending = MVPP2_MAX_RXD;
+	ring->tx_max_pending = MVPP2_MAX_TXD;
+	ring->rx_pending = port->rx_ring_size;
+	ring->tx_pending = port->tx_ring_size;
+}
+
+static int mvpp2_ethtool_set_ringparam(struct net_device *dev,
+				       struct ethtool_ringparam *ring)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	u16 prev_rx_ring_size = port->rx_ring_size;
+	u16 prev_tx_ring_size = port->tx_ring_size;
+	int err;
+
+	err = mvpp2_check_ringparam_valid(dev, ring);
+	if (err)
+		return err;
+
+	if (!netif_running(dev)) {
+		port->rx_ring_size = ring->rx_pending;
+		port->tx_ring_size = ring->tx_pending;
+		return 0;
+	}
+
+	/* The interface is running, so we have to force a
+	 * reallocation of the queues
+	 */
+	mvpp2_stop_dev(port);
+	mvpp2_cleanup_rxqs(port);
+	mvpp2_cleanup_txqs(port);
+
+	port->rx_ring_size = ring->rx_pending;
+	port->tx_ring_size = ring->tx_pending;
+
+	err = mvpp2_setup_rxqs(port);
+	if (err) {
+		/* Reallocate Rx queues with the original ring size */
+		port->rx_ring_size = prev_rx_ring_size;
+		ring->rx_pending = prev_rx_ring_size;
+		err = mvpp2_setup_rxqs(port);
+		if (err)
+			goto err_out;
+	}
+	err = mvpp2_setup_txqs(port);
+	if (err) {
+		/* Reallocate Tx queues with the original ring size */
+		port->tx_ring_size = prev_tx_ring_size;
+		ring->tx_pending = prev_tx_ring_size;
+		err = mvpp2_setup_txqs(port);
+		if (err)
+			goto err_clean_rxqs;
+	}
+
+	mvpp2_start_dev(port);
+	mvpp2_egress_enable(port);
+	mvpp2_ingress_enable(port);
+
+	return 0;
+
+err_clean_rxqs:
+	mvpp2_cleanup_rxqs(port);
+err_out:
+	netdev_err(dev, "fail to change ring parameters");
+	return err;
+}
+
+/* Device ops */
+
+static const struct net_device_ops mvpp2_netdev_ops = {
+	.ndo_open		= mvpp2_open,
+	.ndo_stop		= mvpp2_stop,
+	.ndo_start_xmit		= mvpp2_tx,
+	.ndo_set_rx_mode	= mvpp2_set_rx_mode,
+	.ndo_set_mac_address	= mvpp2_set_mac_address,
+	.ndo_change_mtu		= mvpp2_change_mtu,
+	.ndo_get_stats64	= mvpp2_get_stats64,
+};
+
+static const struct ethtool_ops mvpp2_eth_tool_ops = {
+	.get_link	= ethtool_op_get_link,
+	.get_settings	= mvpp2_ethtool_get_settings,
+	.set_settings	= mvpp2_ethtool_set_settings,
+	.set_coalesce	= mvpp2_ethtool_set_coalesce,
+	.get_coalesce	= mvpp2_ethtool_get_coalesce,
+	.get_drvinfo	= mvpp2_ethtool_get_drvinfo,
+	.get_ringparam	= mvpp2_ethtool_get_ringparam,
+	.set_ringparam	= mvpp2_ethtool_set_ringparam,
+};
+
+/* Driver initialization */
+
+static void mvpp2_port_power_up(struct mvpp2_port *port)
+{
+	mvpp2_port_mii_set(port);
+	mvpp2_port_periodic_xon_disable(port);
+	mvpp2_port_reset(port);
+}
+
+/* Initialize port HW */
+static int mvpp2_port_init(struct mvpp2_port *port)
+{
+	struct device *dev = port->dev->dev.parent;
+	struct mvpp2 *priv = port->priv;
+	struct mvpp2_txq_pcpu *txq_pcpu;
+	int queue, cpu, err;
+
+	if (port->first_rxq + rxq_number > MVPP2_RXQ_TOTAL_NUM)
+		return -EINVAL;
+
+	/* Disable port */
+	mvpp2_egress_disable(port);
+	mvpp2_port_disable(port);
+
+	port->txqs = devm_kcalloc(dev, txq_number, sizeof(*port->txqs),
+				  GFP_KERNEL);
+	if (!port->txqs)
+		return -ENOMEM;
+
+	/* Associate physical Tx queues to this port and initialize.
+	 * The mapping is predefined.
+	 */
+	for (queue = 0; queue < txq_number; queue++) {
+		int queue_phy_id = mvpp2_txq_phys(port->id, queue);
+		struct mvpp2_tx_queue *txq;
+
+		txq = devm_kzalloc(dev, sizeof(*txq), GFP_KERNEL);
+		if (!txq)
+			return -ENOMEM;
+
+		txq->pcpu = alloc_percpu(struct mvpp2_txq_pcpu);
+		if (!txq->pcpu) {
+			err = -ENOMEM;
+			goto err_free_percpu;
+		}
+
+		txq->id = queue_phy_id;
+		txq->log_id = queue;
+		txq->done_pkts_coal = MVPP2_TXDONE_COAL_PKTS_THRESH;
+		for_each_present_cpu(cpu) {
+			txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
+			txq_pcpu->cpu = cpu;
+		}
+
+		port->txqs[queue] = txq;
+	}
+
+	port->rxqs = devm_kcalloc(dev, rxq_number, sizeof(*port->rxqs),
+				  GFP_KERNEL);
+	if (!port->rxqs) {
+		err = -ENOMEM;
+		goto err_free_percpu;
+	}
+
+	/* Allocate and initialize Rx queue for this port */
+	for (queue = 0; queue < rxq_number; queue++) {
+		struct mvpp2_rx_queue *rxq;
+
+		/* Map physical Rx queue to port's logical Rx queue */
+		rxq = devm_kzalloc(dev, sizeof(*rxq), GFP_KERNEL);
+		if (!rxq)
+			goto err_free_percpu;
+		/* Map this Rx queue to a physical queue */
+		rxq->id = port->first_rxq + queue;
+		rxq->port = port->id;
+		rxq->logic_rxq = queue;
+
+		port->rxqs[queue] = rxq;
+	}
+
+	/* Configure Rx queue group interrupt for this port */
+	mvpp2_write(priv, MVPP2_ISR_RXQ_GROUP_REG(port->id), rxq_number);
+
+	/* Create Rx descriptor rings */
+	for (queue = 0; queue < rxq_number; queue++) {
+		struct mvpp2_rx_queue *rxq = port->rxqs[queue];
+
+		rxq->size = port->rx_ring_size;
+		rxq->pkts_coal = MVPP2_RX_COAL_PKTS;
+		rxq->time_coal = MVPP2_RX_COAL_USEC;
+	}
+
+	mvpp2_ingress_disable(port);
+
+	/* Port default configuration */
+	mvpp2_defaults_set(port);
+
+	/* Port's classifier configuration */
+	mvpp2_cls_oversize_rxq_set(port);
+	mvpp2_cls_port_config(port);
+
+	/* Provide an initial Rx packet size */
+	port->pkt_size = MVPP2_RX_PKT_SIZE(port->dev->mtu);
+
+	/* Initialize pools for swf */
+	err = mvpp2_swf_bm_pool_init(port);
+	if (err)
+		goto err_free_percpu;
+
+	return 0;
+
+err_free_percpu:
+	for (queue = 0; queue < txq_number; queue++) {
+		if (!port->txqs[queue])
+			continue;
+		free_percpu(port->txqs[queue]->pcpu);
+	}
+	return err;
+}
+
+/* Ports initialization */
+static int mvpp2_port_probe(struct platform_device *pdev,
+			    struct device_node *port_node,
+			    struct mvpp2 *priv,
+			    int *next_first_rxq)
+{
+	struct device_node *phy_node;
+	struct mvpp2_port *port;
+	struct net_device *dev;
+	struct resource *res;
+	const char *dt_mac_addr;
+	const char *mac_from;
+	char hw_mac_addr[ETH_ALEN];
+	u32 id;
+	int features;
+	int phy_mode;
+	int priv_common_regs_num = 2;
+	int err, i;
+
+	dev = alloc_etherdev_mqs(sizeof(struct mvpp2_port), txq_number,
+				 rxq_number);
+	if (!dev)
+		return -ENOMEM;
+
+	phy_node = of_parse_phandle(port_node, "phy", 0);
+	if (!phy_node) {
+		dev_err(&pdev->dev, "missing phy\n");
+		err = -ENODEV;
+		goto err_free_netdev;
+	}
+
+	phy_mode = of_get_phy_mode(port_node);
+	if (phy_mode < 0) {
+		dev_err(&pdev->dev, "incorrect phy mode\n");
+		err = phy_mode;
+		goto err_free_netdev;
+	}
+
+	if (of_property_read_u32(port_node, "port-id", &id)) {
+		err = -EINVAL;
+		dev_err(&pdev->dev, "missing port-id value\n");
+		goto err_free_netdev;
+	}
+
+	dev->tx_queue_len = MVPP2_MAX_TXD;
+	dev->watchdog_timeo = 5 * HZ;
+	dev->netdev_ops = &mvpp2_netdev_ops;
+	dev->ethtool_ops = &mvpp2_eth_tool_ops;
+
+	port = netdev_priv(dev);
+
+	port->irq = irq_of_parse_and_map(port_node, 0);
+	if (port->irq <= 0) {
+		err = -EINVAL;
+		goto err_free_netdev;
+	}
+
+	if (of_property_read_bool(port_node, "marvell,loopback"))
+		port->flags |= MVPP2_F_LOOPBACK;
+
+	port->priv = priv;
+	port->id = id;
+	port->first_rxq = *next_first_rxq;
+	port->phy_node = phy_node;
+	port->phy_interface = phy_mode;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM,
+				    priv_common_regs_num + id);
+	port->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(port->base)) {
+		err = PTR_ERR(port->base);
+		dev_err(&pdev->dev, "cannot obtain port base address\n");
+		goto err_free_irq;
+	}
+
+	/* Alloc per-cpu stats */
+	port->stats = netdev_alloc_pcpu_stats(struct mvpp2_pcpu_stats);
+	if (!port->stats) {
+		err = -ENOMEM;
+		goto err_free_irq;
+	}
+
+	dt_mac_addr = of_get_mac_address(port_node);
+	if (dt_mac_addr && is_valid_ether_addr(dt_mac_addr)) {
+		mac_from = "device tree";
+		ether_addr_copy(dev->dev_addr, dt_mac_addr);
+	} else {
+		mvpp2_get_mac_address(port, hw_mac_addr);
+		if (is_valid_ether_addr(hw_mac_addr)) {
+			mac_from = "hardware";
+			ether_addr_copy(dev->dev_addr, hw_mac_addr);
+		} else {
+			mac_from = "random";
+			eth_hw_addr_random(dev);
+		}
+	}
+
+	port->tx_ring_size = MVPP2_MAX_TXD;
+	port->rx_ring_size = MVPP2_MAX_RXD;
+	port->dev = dev;
+	SET_NETDEV_DEV(dev, &pdev->dev);
+
+	err = mvpp2_port_init(port);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to init port %d\n", id);
+		goto err_free_stats;
+	}
+	mvpp2_port_power_up(port);
+
+	netif_napi_add(dev, &port->napi, mvpp2_poll, NAPI_POLL_WEIGHT);
+	features = NETIF_F_SG | NETIF_F_IP_CSUM;
+	dev->features = features | NETIF_F_RXCSUM;
+	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
+	dev->vlan_features |= features;
+
+	err = register_netdev(dev);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to register netdev\n");
+		goto err_free_txq_pcpu;
+	}
+	netdev_info(dev, "Using %s mac address %pM\n", mac_from, dev->dev_addr);
+
+	/* Increment the first Rx queue number to be used by the next port */
+	*next_first_rxq += rxq_number;
+	priv->port_list[id] = port;
+	return 0;
+
+err_free_txq_pcpu:
+	for (i = 0; i < txq_number; i++)
+		free_percpu(port->txqs[i]->pcpu);
+err_free_stats:
+	free_percpu(port->stats);
+err_free_irq:
+	irq_dispose_mapping(port->irq);
+err_free_netdev:
+	free_netdev(dev);
+	return err;
+}
+
+/* Ports removal routine */
+static void mvpp2_port_remove(struct mvpp2_port *port)
+{
+	int i;
+
+	unregister_netdev(port->dev);
+	free_percpu(port->stats);
+	for (i = 0; i < txq_number; i++)
+		free_percpu(port->txqs[i]->pcpu);
+	irq_dispose_mapping(port->irq);
+	free_netdev(port->dev);
+}
+
+/* Initialize decoding windows */
+static void mvpp2_conf_mbus_windows(const struct mbus_dram_target_info *dram,
+				    struct mvpp2 *priv)
+{
+	u32 win_enable;
+	int i;
+
+	for (i = 0; i < 6; i++) {
+		mvpp2_write(priv, MVPP2_WIN_BASE(i), 0);
+		mvpp2_write(priv, MVPP2_WIN_SIZE(i), 0);
+
+		if (i < 4)
+			mvpp2_write(priv, MVPP2_WIN_REMAP(i), 0);
+	}
+
+	win_enable = 0;
+
+	for (i = 0; i < dram->num_cs; i++) {
+		const struct mbus_dram_window *cs = dram->cs + i;
+
+		mvpp2_write(priv, MVPP2_WIN_BASE(i),
+			    (cs->base & 0xffff0000) | (cs->mbus_attr << 8) |
+			    dram->mbus_dram_target_id);
+
+		mvpp2_write(priv, MVPP2_WIN_SIZE(i),
+			    (cs->size - 1) & 0xffff0000);
+
+		win_enable |= (1 << i);
+	}
+
+	mvpp2_write(priv, MVPP2_BASE_ADDR_ENABLE, win_enable);
+}
+
+/* Initialize Rx FIFO's */
+static void mvpp2_rx_fifo_init(struct mvpp2 *priv)
+{
+	int port;
+
+	for (port = 0; port < MVPP2_MAX_PORTS; port++) {
+		mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(port),
+			    MVPP2_RX_FIFO_PORT_DATA_SIZE);
+		mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(port),
+			    MVPP2_RX_FIFO_PORT_ATTR_SIZE);
+	}
+
+	mvpp2_write(priv, MVPP2_RX_MIN_PKT_SIZE_REG,
+		    MVPP2_RX_FIFO_PORT_MIN_PKT);
+	mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1);
+}
+
+/* Initialize network controller common part HW */
+static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
+{
+	const struct mbus_dram_target_info *dram_target_info;
+	int err, i;
+
+	/* Checks for hardware constraints */
+	if (rxq_number % 4 || (rxq_number > MVPP2_MAX_RXQ) ||
+	    (txq_number > MVPP2_MAX_TXQ)) {
+		dev_err(&pdev->dev, "invalid queue size parameter\n");
+		return -EINVAL;
+	}
+
+	/* MBUS windows configuration */
+	dram_target_info = mv_mbus_dram_info();
+	if (dram_target_info)
+		mvpp2_conf_mbus_windows(dram_target_info, priv);
+
+	/* Allocate and initialize aggregated TXQs */
+	priv->aggr_txqs = devm_kcalloc(&pdev->dev, num_present_cpus(),
+				       sizeof(struct mvpp2_tx_queue),
+				       GFP_KERNEL);
+	if (!priv->aggr_txqs)
+		return -ENOMEM;
+
+	for_each_present_cpu(i) {
+		priv->aggr_txqs[i].id = i;
+		priv->aggr_txqs[i].size = MVPP2_AGGR_TXQ_SIZE;
+		err = mvpp2_aggr_txq_init(pdev, &priv->aggr_txqs[i],
+					  MVPP2_AGGR_TXQ_SIZE, i, priv);
+		if (err < 0)
+			return err;
+	}
+
+	/* Rx Fifo Init */
+	mvpp2_rx_fifo_init(priv);
+
+	/* Reset Rx queue group interrupt configuration */
+	for (i = 0; i < MVPP2_MAX_PORTS; i++)
+		mvpp2_write(priv, MVPP2_ISR_RXQ_GROUP_REG(i), rxq_number);
+
+	writel(MVPP2_EXT_GLOBAL_CTRL_DEFAULT,
+	       priv->lms_base + MVPP2_MNG_EXTENDED_GLOBAL_CTRL_REG);
+
+	/* Allow cache snoop when transmiting packets */
+	mvpp2_write(priv, MVPP2_TX_SNOOP_REG, 0x1);
+
+	/* Buffer Manager initialization */
+	err = mvpp2_bm_init(pdev, priv);
+	if (err < 0)
+		return err;
+
+	/* Parser default initialization */
+	err = mvpp2_prs_default_init(pdev, priv);
+	if (err < 0)
+		return err;
+
+	/* Classifier default initialization */
+	mvpp2_cls_init(priv);
+
+	return 0;
+}
+
+static int mvpp2_probe(struct platform_device *pdev)
+{
+	struct device_node *dn = pdev->dev.of_node;
+	struct device_node *port_node;
+	struct mvpp2 *priv;
+	struct resource *res;
+	int port_count, first_rxq;
+	int err;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(struct mvpp2), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	priv->lms_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->lms_base))
+		return PTR_ERR(priv->lms_base);
+
+	priv->pp_clk = devm_clk_get(&pdev->dev, "pp_clk");
+	if (IS_ERR(priv->pp_clk))
+		return PTR_ERR(priv->pp_clk);
+	err = clk_prepare_enable(priv->pp_clk);
+	if (err < 0)
+		return err;
+
+	priv->gop_clk = devm_clk_get(&pdev->dev, "gop_clk");
+	if (IS_ERR(priv->gop_clk)) {
+		err = PTR_ERR(priv->gop_clk);
+		goto err_pp_clk;
+	}
+	err = clk_prepare_enable(priv->gop_clk);
+	if (err < 0)
+		goto err_pp_clk;
+
+	/* Get system's tclk rate */
+	priv->tclk = clk_get_rate(priv->pp_clk);
+
+	/* Initialize network controller */
+	err = mvpp2_init(pdev, priv);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to initialize controller\n");
+		goto err_gop_clk;
+	}
+
+	port_count = of_get_available_child_count(dn);
+	if (port_count == 0) {
+		dev_err(&pdev->dev, "no ports enabled\n");
+		goto err_gop_clk;
+	}
+
+	priv->port_list = devm_kcalloc(&pdev->dev, port_count,
+				      sizeof(struct mvpp2_port *),
+				      GFP_KERNEL);
+	if (!priv->port_list) {
+		err = -ENOMEM;
+		goto err_gop_clk;
+	}
+
+	/* Initialize ports */
+	first_rxq = 0;
+	for_each_available_child_of_node(dn, port_node) {
+		err = mvpp2_port_probe(pdev, port_node, priv, &first_rxq);
+		if (err < 0)
+			goto err_gop_clk;
+	}
+
+	platform_set_drvdata(pdev, priv);
+	return 0;
+
+err_gop_clk:
+	clk_disable_unprepare(priv->gop_clk);
+err_pp_clk:
+	clk_disable_unprepare(priv->pp_clk);
+	return err;
+}
+
+static int mvpp2_remove(struct platform_device *pdev)
+{
+	struct mvpp2 *priv = platform_get_drvdata(pdev);
+	struct device_node *dn = pdev->dev.of_node;
+	struct device_node *port_node;
+	int i = 0;
+
+	for_each_available_child_of_node(dn, port_node) {
+		if (priv->port_list[i])
+			mvpp2_port_remove(priv->port_list[i]);
+		i++;
+	}
+
+	for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
+		struct mvpp2_bm_pool *bm_pool = &priv->bm_pools[i];
+
+		mvpp2_bm_pool_destroy(pdev, priv, bm_pool);
+	}
+
+	for_each_present_cpu(i) {
+		struct mvpp2_tx_queue *aggr_txq = &priv->aggr_txqs[i];
+
+		dma_free_coherent(&pdev->dev,
+				  MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
+				  aggr_txq->descs,
+				  aggr_txq->descs_phys);
+	}
+
+	clk_disable_unprepare(priv->pp_clk);
+	clk_disable_unprepare(priv->gop_clk);
+
+	return 0;
+}
+
+static const struct of_device_id mvpp2_match[] = {
+	{ .compatible = "marvell,armada-375-pp2" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, mvpp2_match);
+
+static struct platform_driver mvpp2_driver = {
+	.probe = mvpp2_probe,
+	.remove = mvpp2_remove,
+	.driver = {
+		.name = MVPP2_DRIVER_NAME,
+		.of_match_table = mvpp2_match,
+	},
+};
+
+module_platform_driver(mvpp2_driver);
+
+MODULE_DESCRIPTION("Marvell PPv2 Ethernet Driver - www.marvell.com");
+MODULE_AUTHOR("Marcin Wojtas <mw@semihalf.com>");
+MODULE_LICENSE("GPLv2");
-- 
cgit v1.2.3


From 685343fc3ba61a1f6eef361b786601123db16c28 Mon Sep 17 00:00:00 2001
From: Tom Gundersen <teg@jklm.no>
Date: Mon, 14 Jul 2014 16:37:22 +0200
Subject: net: add name_assign_type netdev attribute

Based on a patch by David Herrmann.

The name_assign_type attribute gives hints where the interface name of a
given net-device comes from. These values are currently defined:
  NET_NAME_ENUM:
    The ifname is provided by the kernel with an enumerated
    suffix, typically based on order of discovery. Names may
    be reused and unpredictable.
  NET_NAME_PREDICTABLE:
    The ifname has been assigned by the kernel in a predictable way
    that is guaranteed to avoid reuse and always be the same for a
    given device. Examples include statically created devices like
    the loopback device and names deduced from hardware properties
    (including being given explicitly by the firmware). Names
    depending on the order of discovery, or in any other way on the
    existence of other devices, must not be marked as PREDICTABLE.
  NET_NAME_USER:
    The ifname was provided by user-space during net-device setup.
  NET_NAME_RENAMED:
    The net-device has been renamed from userspace. Once this type is set,
    it cannot change again.
  NET_NAME_UNKNOWN:
    This is an internal placeholder to indicate that we yet haven't yet
    categorized the name. It will not be exposed to userspace, rather
    -EINVAL is returned.

The aim of these patches is to improve user-space renaming of interfaces. As
a general rule, userspace must rename interfaces to guarantee that names stay
the same every time a given piece of hardware appears (at boot, or when
attaching it). However, there are several situations where userspace should
not perform the renaming, and that depends on both the policy of the local
admin, but crucially also on the nature of the current interface name.

If an interface was created in repsonse to a userspace request, and userspace
already provided a name, we most probably want to leave that name alone. The
main instance of this is wifi-P2P devices created over nl80211, which currently
have a long-standing bug where they are getting renamed by udev. We label such
names NET_NAME_USER.

If an interface, unbeknown to us, has already been renamed from userspace, we
most probably want to leave also that alone. This will typically happen when
third-party plugins (for instance to udev, but the interface is generic so could
be from anywhere) renames the interface without informing udev about it. A
typical situation is when you switch root from an installer or an initrd to the
real system and the new instance of udev does not know what happened before
the switch. These types of problems have caused repeated issues in the past. To
solve this, once an interface has been renamed, its name is labelled
NET_NAME_RENAMED.

In many cases, the kernel is actually able to name interfaces in such a
way that there is no need for userspace to rename them. This is the case when
the enumeration order of devices, or in fact any other (non-parent) device on
the system, can not influence the name of the interface. Examples include
statically created devices, or any naming schemes based on hardware properties
of the interface. In this case the admin may prefer to use the kernel-provided
names, and to make that possible we label such names NET_NAME_PREDICTABLE.
We want the kernel to have tho possibilty of performing predictable interface
naming itself (and exposing to userspace that it has), as the information
necessary for a proper naming scheme for a certain class of devices may not
be exposed to userspace.

The case where renaming is almost certainly desired, is when the kernel has
given the interface a name using global device enumeration based on order of
discovery (ethX, wlanY, etc). These naming schemes are labelled NET_NAME_ENUM.

Lastly, a fallback is left as NET_NAME_UNKNOWN, to indicate that a driver has
not yet been ported. This is mostly useful as a transitionary measure, allowing
us to label the various naming schemes bit by bit.

v8: minor documentation fixes
v9: move comment to the right commit

Signed-off-by: Tom Gundersen <teg@jklm.no>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Reviewed-by: Kay Sievers <kay@vrfy.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ABI/testing/sysfs-class-net | 11 +++++++++++
 include/linux/netdevice.h                 |  2 ++
 include/uapi/linux/netdevice.h            |  6 ++++++
 net/core/net-sysfs.c                      | 20 ++++++++++++++++++++
 4 files changed, 39 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index 416c5d59f52e..d322b0581194 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -1,3 +1,14 @@
+What:		/sys/class/net/<iface>/name_assign_type
+Date:		July 2014
+KernelVersion:	3.17
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the name assignment type. Possible values are:
+		1: enumerated by the kernel, possibly in an unpredictable way
+		2: predictably named by the kernel
+		3: named by userspace
+		4: renamed
+
 What:		/sys/class/net/<iface>/addr_assign_type
 Date:		July 2010
 KernelVersion:	3.2
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3a320db96180..9be34732142f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1381,6 +1381,8 @@ struct net_device {
 	struct kset		*queues_kset;
 #endif
 
+	unsigned char		name_assign_type;
+
 	bool			uc_promisc;
 	unsigned int		promiscuity;
 	unsigned int		allmulti;
diff --git a/include/uapi/linux/netdevice.h b/include/uapi/linux/netdevice.h
index fdfbd1c17065..55818543342d 100644
--- a/include/uapi/linux/netdevice.h
+++ b/include/uapi/linux/netdevice.h
@@ -37,6 +37,12 @@
 #define INIT_NETDEV_GROUP	0
 
 
+/* interface name assignment types (sysfs name_assign_type attribute) */
+#define NET_NAME_UNKNOWN	0	/* unknown origin (not exposed to userspace) */
+#define NET_NAME_ENUM		1	/* enumerated by kernel */
+#define NET_NAME_PREDICTABLE	2	/* predictably named by the kernel */
+#define NET_NAME_USER		3	/* provided by user-space */
+#define NET_NAME_RENAMED	4	/* renamed by user-space */
 
 /* Media selection options. */
 enum {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1cac29ebb05b..7752f2ad49a5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -112,6 +112,25 @@ NETDEVICE_SHOW_RO(ifindex, fmt_dec);
 NETDEVICE_SHOW_RO(type, fmt_dec);
 NETDEVICE_SHOW_RO(link_mode, fmt_dec);
 
+static ssize_t format_name_assign_type(const struct net_device *net, char *buf)
+{
+	return sprintf(buf, fmt_dec, net->name_assign_type);
+}
+
+static ssize_t name_assign_type_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct net_device *net = to_net_dev(dev);
+	ssize_t ret = -EINVAL;
+
+	if (net->name_assign_type != NET_NAME_UNKNOWN)
+		ret = netdev_show(dev, attr, buf, format_name_assign_type);
+
+	return ret;
+}
+static DEVICE_ATTR_RO(name_assign_type);
+
 /* use same locking rules as GIFHWADDR ioctl's */
 static ssize_t address_show(struct device *dev, struct device_attribute *attr,
 			    char *buf)
@@ -387,6 +406,7 @@ static struct attribute *net_class_attrs[] = {
 	&dev_attr_dev_port.attr,
 	&dev_attr_iflink.attr,
 	&dev_attr_ifindex.attr,
+	&dev_attr_name_assign_type.attr,
 	&dev_attr_addr_assign_type.attr,
 	&dev_attr_addr_len.attr,
 	&dev_attr_link_mode.attr,
-- 
cgit v1.2.3


From 26c4fdb0528ae7c4be9fbc8a8210f3b410e6b5aa Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 14 Jul 2014 17:55:30 -0400
Subject: net-timestamp: document deprecated syststamp

The SO_TIMESTAMPING API defines option SOF_TIMESTAMPING_SYS_HW.
This feature is deprecated. It should not be implemented by new
device drivers. Existing drivers do not implement it, either --
with one exception.

Driver developers are encouraged to expose the NIC hw clock as a
PTP HW clock source, instead, and synchronize system time to the
HW source.

The control flag cannot be removed due to being part of the ABI, nor
can the structure scm_timestamping that is returned. Due to the one
legacy driver, the internal datapath and structure are not removed.

This patch only clearly marks the interface as deprecated. Device
drivers should always return a syststamp value of zero.

Signed-off-by: Willem de Bruijn <willemb@google.com>

----

We can consider adding a WARN_ON_ONCE in__sock_recv_timestamp
if non-zero syststamp is encountered
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/timestamping.txt | 10 ++++++++--
 include/linux/skbuff.h                    |  6 ++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index bc3554124903..8b4ad809df27 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -40,7 +40,7 @@ the set bits correspond to data that is available, then the control
 message will not be generated:
 
 SOF_TIMESTAMPING_SOFTWARE:     report systime if available
-SOF_TIMESTAMPING_SYS_HARDWARE: report hwtimetrans if available
+SOF_TIMESTAMPING_SYS_HARDWARE: report hwtimetrans if available (deprecated)
 SOF_TIMESTAMPING_RAW_HARDWARE: report hwtimeraw if available
 
 It is worth noting that timestamps may be collected for reasons other
@@ -94,7 +94,13 @@ not perfect; as a consequence, sorting packets received via different
 NICs by their hwtimetrans may differ from the order in which they were
 received. hwtimetrans may be non-monotonic even for the same NIC.
 Filled in if SOF_TIMESTAMPING_SYS_HARDWARE is set. Requires support
-by the network device and will be empty without that support.
+by the network device and will be empty without that support. This
+field is DEPRECATED. Only one driver computes this value. New device
+drivers must leave this zero. Instead, they can expose the hardware
+clock device on the NIC directly as a HW PTP clock source, to allow
+time conversion in userspace and optionally synchronize system time
+with a userspace PTP stack such as linuxptp. For the PTP clock API,
+see Documentation/ptp/ptp.txt.
 
 
 SIOCSHWTSTAMP, SIOCGHWTSTAMP:
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 890fb3307dd6..369430340ed9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -211,7 +211,7 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
  * struct skb_shared_hwtstamps - hardware time stamps
  * @hwtstamp:	hardware time stamp transformed into duration
  *		since arbitrary point in time
- * @syststamp:	hwtstamp transformed to system time base
+ * @syststamp:	hwtstamp transformed to system time base (deprecated)
  *
  * Software time stamps generated by ktime_get_real() are stored in
  * skb->tstamp. The relation between the different kinds of time
@@ -222,7 +222,9 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
  * syststamp/tstamp/"syststamp from other device" comparison is
  * limited by the accuracy of the transformation into system time
  * base. This depends on the device driver and its underlying
- * hardware.
+ * hardware. The syststamp implementation is deprecated in favor
+ * of hwtstamps and hw PTP clock sources exposed directly to
+ * userspace.
  *
  * hwtstamps can only be compared against other hwtstamps from
  * the same device.
-- 
cgit v1.2.3


From f68c9257201ff5d443bf2e815ce01578835674e4 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Wed, 16 Jul 2014 15:13:02 +0300
Subject: net: davinci_mdio: reuse for keystone2 arch

The similar MDIO HW blocks is used by keystone 2 SoCs as
in Davinci SoCs:
- one in Gigabit Ethernet (GbE) Switch Subsystem
  See http://www.ti.com/lit/ug/sprugv9d/sprugv9d.pdf
- one in 10 Gigabit Ethernet Subsystem
  See http://www.ti.com/lit/ug/spruhj5/spruhj5.pdf

Hence, reuse Davinci MDIO driver for Keystone 2 and
enable TI networking for Keystone 2 devices

Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Mugunthan V N <mugunthanvnm@ti.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/davinci-mdio.txt | 8 ++++----
 drivers/net/ethernet/ti/Kconfig                        | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/davinci-mdio.txt b/Documentation/devicetree/bindings/net/davinci-mdio.txt
index 72efaaf764f7..0369e25aabd2 100644
--- a/Documentation/devicetree/bindings/net/davinci-mdio.txt
+++ b/Documentation/devicetree/bindings/net/davinci-mdio.txt
@@ -1,8 +1,8 @@
-TI SoC Davinci MDIO Controller Device Tree Bindings
+TI SoC Davinci/Keystone2 MDIO Controller Device Tree Bindings
 ---------------------------------------------------
 
 Required properties:
-- compatible		: Should be "ti,davinci_mdio"
+- compatible		: Should be "ti,davinci_mdio" or "ti,keystone_mdio"
 - reg			: physical base address and size of the davinci mdio
 			  registers map
 - bus_freq		: Mdio Bus frequency
@@ -19,7 +19,7 @@ file.
 Examples:
 
 	mdio: davinci_mdio@4A101000 {
-		compatible = "ti,cpsw";
+		compatible = "ti,davinci_mdio";
 		reg = <0x4A101000 0x1000>;
 		bus_freq = <1000000>;
 	};
@@ -27,7 +27,7 @@ Examples:
 (or)
 
 	mdio: davinci_mdio@4A101000 {
-		compatible = "ti,cpsw";
+		compatible = "ti,davinci_mdio";
 		ti,hwmods = "davinci_mdio";
 		bus_freq = <1000000>;
 	};
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 53150c25a96b..1769700a6070 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -5,7 +5,7 @@
 config NET_VENDOR_TI
 	bool "Texas Instruments (TI) devices"
 	default y
-	depends on PCI || EISA || AR7 || (ARM && (ARCH_DAVINCI || ARCH_OMAP3 || SOC_AM33XX))
+	depends on PCI || EISA || AR7 || (ARM && (ARCH_DAVINCI || ARCH_OMAP3 || SOC_AM33XX || ARCH_KEYSTONE))
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y
 	  and read the Ethernet-HOWTO, available from
@@ -32,7 +32,7 @@ config TI_DAVINCI_EMAC
 
 config TI_DAVINCI_MDIO
 	tristate "TI DaVinci MDIO Support"
-	depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 || SOC_AM33XX )
+	depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 || SOC_AM33XX || ARCH_KEYSTONE )
 	select PHYLIB
 	---help---
 	  This driver supports TI's DaVinci MDIO module.
-- 
cgit v1.2.3


From 92abf75033d2677a684c623d60f093b130c4b38f Mon Sep 17 00:00:00 2001
From: Jianhua Xie <jianhua.xie@freescale.com>
Date: Thu, 17 Jul 2014 14:16:26 +0800
Subject: bonding: update bonding.txt for Layer2 hash factors

Document the Layer 2 hash factors with packet type ID field.

CC: Jay Vosburgh <j.vosburgh@gmail.com>
CC: Veaceslav Falico <vfalico@gmail.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: David S. Miller <davem@davemloft.net>
CC: Pan Jiafei <Jiafei.Pan@freescale.com>

Signed-off-by: Jianhua Xie <jianhua.xie@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/bonding.txt | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 9c723ecd0025..eeb5b2e97bed 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -542,10 +542,10 @@ mode
 
 		XOR policy: Transmit based on the selected transmit
 		hash policy.  The default policy is a simple [(source
-		MAC address XOR'd with destination MAC address) modulo
-		slave count].  Alternate transmit policies may be
-		selected via the xmit_hash_policy option, described
-		below.
+		MAC address XOR'd with destination MAC address XOR
+		packet type ID) modulo slave count].  Alternate transmit
+		policies may be	selected via the xmit_hash_policy option,
+		described below.
 
 		This mode provides load balancing and fault tolerance.
 
@@ -801,10 +801,11 @@ xmit_hash_policy
 
 	layer2
 
-		Uses XOR of hardware MAC addresses to generate the
-		hash.  The formula is
+		Uses XOR of hardware MAC addresses and packet type ID
+		field to generate the hash. The formula is
 
-		(source MAC XOR destination MAC) modulo slave count
+		hash = source MAC XOR destination MAC XOR packet type ID
+		slave number = hash modulo slave count
 
 		This algorithm will place all traffic to a particular
 		network peer on the same slave.
@@ -819,7 +820,7 @@ xmit_hash_policy
 		Uses XOR of hardware MAC addresses and IP addresses to
 		generate the hash.  The formula is
 
-		hash = source MAC XOR destination MAC
+		hash = source MAC XOR destination MAC XOR packet type ID
 		hash = hash XOR source IP XOR destination IP
 		hash = hash XOR (hash RSHIFT 16)
 		hash = hash XOR (hash RSHIFT 8)
@@ -2301,13 +2302,13 @@ broadcast: Like active-backup, there is not much advantage to this
 	bandwidth.  
 
 	Additionally, the linux bonding 802.3ad implementation
-	distributes traffic by peer (using an XOR of MAC addresses),
-	so in a "gatewayed" configuration, all outgoing traffic will
-	generally use the same device.  Incoming traffic may also end
-	up on a single device, but that is dependent upon the
-	balancing policy of the peer's 8023.ad implementation.  In a
-	"local" configuration, traffic will be distributed across the
-	devices in the bond.
+	distributes traffic by peer (using an XOR of MAC addresses
+	and packet type ID), so in a "gatewayed" configuration, all
+	outgoing traffic will generally use the same device.  Incoming
+	traffic may also end up on a single device, but that is
+	dependent upon the balancing policy of the peer's 8023.ad
+	implementation.  In a "local" configuration, traffic will be
+	distributed across the devices in the bond.
 
 	Finally, the 802.3ad mode mandates the use of the MII monitor,
 	therefore, the ARP monitor is not available in this mode.
-- 
cgit v1.2.3


From 955164ebb8f8d337ca5b8540ae1f9d4def7f0ad0 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 25 May 2014 22:35:39 +0200
Subject: NFC: dts: st21nfcb_i2c: Add DTS Documentation

Describe the properties used by the st21nfcb NFC controller driver.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 .../devicetree/bindings/net/nfc/st21nfcb.txt       | 33 ++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/nfc/st21nfcb.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt b/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt
new file mode 100644
index 000000000000..3b58ae480344
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt
@@ -0,0 +1,33 @@
+* STMicroelectronics SAS. ST21NFCB NFC Controller
+
+Required properties:
+- compatible: Should be "st,st21nfcb_i2c".
+- clock-frequency: I²C work frequency.
+- reg: address on the bus
+- interrupt-parent: phandle for the interrupt gpio controller
+- interrupts: GPIO interrupt to which the chip is connected
+- reset-gpios: Output GPIO pin used to reset the ST21NFCB
+
+Optional SoC Specific Properties:
+- pinctrl-names: Contains only one value - "default".
+- pintctrl-0: Specifies the pin control groups used for this controller.
+
+Example (for ARM-based BeagleBoard xM with ST21NFCB on I2C2):
+
+&i2c2 {
+
+	status = "okay";
+
+	st21nfcb: st21nfcb@8 {
+
+		compatible = "st,st21nfcb_i2c";
+
+		reg = <0x08>;
+		clock-frequency = <400000>;
+
+		interrupt-parent = <&gpio5>;
+		interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
+
+		reset-gpios = <&gpio5 29 GPIO_ACTIVE_HIGH>;
+	};
+};
-- 
cgit v1.2.3


From b13d3cbfb8e8a8f53930af67d1ebf05149f32c24 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 24 Jul 2014 16:50:32 +0200
Subject: inet: frag: move eviction of queues to work queue

When the high_thresh limit is reached we try to toss the 'oldest'
incomplete fragment queues until memory limits are below the low_thresh
value.  This happens in softirq/packet processing context.

This has two drawbacks:

1) processors might evict a queue that was about to be completed
by another cpu, because they will compete wrt. resource usage and
resource reclaim.

2) LRU list maintenance is expensive.

But when constantly overloaded, even the 'least recently used' element is
recent, so removing 'lru' queue first is not 'fairer' than removing any
other fragment queue.

This moves eviction out of the fast path:

When the low threshold is reached, a work queue is scheduled
which then iterates over the table and removes the queues that exceed
the memory limits of the namespace. It sets a new flag called
INET_FRAG_EVICTED on the evicted queues so the proper counters will get
incremented when the queue is forcefully expired.

When the high threshold is reached, no more fragment queues are
created until we're below the limit again.

The LRU list is now unused and will be removed in a followup patch.

Joint work with Nikolay Aleksandrov.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |   4 +-
 include/net/inet_frag.h                |   6 +-
 net/ipv4/inet_fragment.c               | 142 +++++++++++++++++++++++----------
 net/ipv4/ip_fragment.c                 |   3 +-
 net/ipv6/reassembly.c                  |   4 +-
 5 files changed, 112 insertions(+), 47 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index f35bfe43bf7a..625c8dda4be7 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -104,7 +104,9 @@ ipfrag_high_thresh - INTEGER
 	is reached.
 
 ipfrag_low_thresh - INTEGER
-	See ipfrag_high_thresh
+	Maximum memory used to reassemble IP fragments before the kernel
+	begins to remove incomplete fragment queues to free up resources.
+	The kernel still accepts new fragments for defragmentation.
 
 ipfrag_time - INTEGER
 	Time in seconds to keep an IP fragment in memory.
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 9fe644d1a26e..e975032ea11b 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -32,6 +32,7 @@ struct inet_frag_queue {
 	int			meat;
 	__u8			last_in;    /* first/last segment arrived? */
 
+#define INET_FRAG_EVICTED	8
 #define INET_FRAG_COMPLETE	4
 #define INET_FRAG_FIRST_IN	2
 #define INET_FRAG_LAST_IN	1
@@ -48,7 +49,7 @@ struct inet_frag_queue {
  *	       rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or
  *	       struct frag_queue))
  */
-#define INETFRAGS_MAXDEPTH		128
+#define INETFRAGS_MAXDEPTH	128
 
 struct inet_frag_bucket {
 	struct hlist_head	chain;
@@ -65,6 +66,9 @@ struct inet_frags {
 	int			secret_interval;
 	struct timer_list	secret_timer;
 
+	struct work_struct	frags_work;
+	unsigned int next_bucket;
+
 	/* The first call to hashfn is responsible to initialize
 	 * rnd. This is best done with net_get_random_once.
 	 */
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 535636017534..43315ecb9400 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,6 +25,9 @@
 #include <net/inet_frag.h>
 #include <net/inet_ecn.h>
 
+#define INETFRAGS_EVICT_BUCKETS   128
+#define INETFRAGS_EVICT_MAX	  512
+
 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
  * Value : 0xff if frame should be dropped.
  *         0 or INET_ECN_CE value, to be ORed in to final iph->tos field
@@ -46,8 +49,6 @@ const u8 ip_frag_ecn_table[16] = {
 };
 EXPORT_SYMBOL(ip_frag_ecn_table);
 
-static int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force);
-
 static unsigned int
 inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
 {
@@ -89,10 +90,92 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
 	mod_timer(&f->secret_timer, now + f->secret_interval);
 }
 
+static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
+{
+	return q->net->low_thresh == 0 ||
+	       frag_mem_limit(q->net) >= q->net->low_thresh;
+}
+
+static unsigned int
+inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
+{
+	struct inet_frag_queue *fq;
+	struct hlist_node *n;
+	unsigned int evicted = 0;
+	HLIST_HEAD(expired);
+
+evict_again:
+	spin_lock(&hb->chain_lock);
+
+	hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
+		if (!inet_fragq_should_evict(fq))
+			continue;
+
+		if (!del_timer(&fq->timer)) {
+			/* q expiring right now thus increment its refcount so
+			 * it won't be freed under us and wait until the timer
+			 * has finished executing then destroy it
+			 */
+			atomic_inc(&fq->refcnt);
+			spin_unlock(&hb->chain_lock);
+			del_timer_sync(&fq->timer);
+			WARN_ON(atomic_read(&fq->refcnt) != 1);
+			inet_frag_put(fq, f);
+			goto evict_again;
+		}
+
+		/* suppress xmit of (icmp) error packet */
+		fq->last_in &= ~INET_FRAG_FIRST_IN;
+		fq->last_in |= INET_FRAG_EVICTED;
+		hlist_del(&fq->list);
+		hlist_add_head(&fq->list, &expired);
+		++evicted;
+	}
+
+	spin_unlock(&hb->chain_lock);
+
+	hlist_for_each_entry_safe(fq, n, &expired, list)
+		f->frag_expire((unsigned long) fq);
+
+	return evicted;
+}
+
+static void inet_frag_worker(struct work_struct *work)
+{
+	unsigned int budget = INETFRAGS_EVICT_BUCKETS;
+	unsigned int i, evicted = 0;
+	struct inet_frags *f;
+
+	f = container_of(work, struct inet_frags, frags_work);
+
+	BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
+
+	read_lock_bh(&f->lock);
+
+	for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
+		evicted += inet_evict_bucket(f, &f->hash[i]);
+		i = (i + 1) & (INETFRAGS_HASHSZ - 1);
+		if (evicted > INETFRAGS_EVICT_MAX)
+			break;
+	}
+
+	f->next_bucket = i;
+
+	read_unlock_bh(&f->lock);
+}
+
+static void inet_frag_schedule_worker(struct inet_frags *f)
+{
+	if (unlikely(!work_pending(&f->frags_work)))
+		schedule_work(&f->frags_work);
+}
+
 void inet_frags_init(struct inet_frags *f)
 {
 	int i;
 
+	INIT_WORK(&f->frags_work, inet_frag_worker);
+
 	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
 		struct inet_frag_bucket *hb = &f->hash[i];
 
@@ -120,16 +203,22 @@ EXPORT_SYMBOL(inet_frags_init_net);
 void inet_frags_fini(struct inet_frags *f)
 {
 	del_timer(&f->secret_timer);
+	cancel_work_sync(&f->frags_work);
 }
 EXPORT_SYMBOL(inet_frags_fini);
 
 void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
 {
+	int i;
+
 	nf->low_thresh = 0;
 
-	local_bh_disable();
-	inet_frag_evictor(nf, f, true);
-	local_bh_enable();
+	read_lock_bh(&f->lock);
+
+	for (i = 0; i < INETFRAGS_HASHSZ ; i++)
+		inet_evict_bucket(f, &f->hash[i]);
+
+	read_unlock_bh(&f->lock);
 
 	percpu_counter_destroy(&nf->mem);
 }
@@ -205,41 +294,6 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
 }
 EXPORT_SYMBOL(inet_frag_destroy);
 
-static int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
-{
-	struct inet_frag_queue *q;
-	int work, evicted = 0;
-
-	work = frag_mem_limit(nf) - nf->low_thresh;
-	while (work > 0 || force) {
-		spin_lock(&nf->lru_lock);
-
-		if (list_empty(&nf->lru_list)) {
-			spin_unlock(&nf->lru_lock);
-			break;
-		}
-
-		q = list_first_entry(&nf->lru_list,
-				struct inet_frag_queue, lru_list);
-		atomic_inc(&q->refcnt);
-		/* Remove q from list to avoid several CPUs grabbing it */
-		list_del_init(&q->lru_list);
-
-		spin_unlock(&nf->lru_lock);
-
-		spin_lock(&q->lock);
-		if (!(q->last_in & INET_FRAG_COMPLETE))
-			inet_frag_kill(q, f);
-		spin_unlock(&q->lock);
-
-		if (atomic_dec_and_test(&q->refcnt))
-			inet_frag_destroy(q, f, &work);
-		evicted++;
-	}
-
-	return evicted;
-}
-
 static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 		struct inet_frag_queue *qp_in, struct inet_frags *f,
 		void *arg)
@@ -292,8 +346,10 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 {
 	struct inet_frag_queue *q;
 
-	if (frag_mem_limit(nf) > nf->high_thresh)
+	if (frag_mem_limit(nf) > nf->high_thresh) {
+		inet_frag_schedule_worker(f);
 		return NULL;
+	}
 
 	q = kzalloc(f->qsize, GFP_ATOMIC);
 	if (q == NULL)
@@ -331,8 +387,8 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 	struct inet_frag_queue *q;
 	int depth = 0;
 
-	if (frag_mem_limit(nf) > nf->high_thresh)
-		inet_frag_evictor(nf, f, false);
+	if (frag_mem_limit(nf) > nf->low_thresh)
+		inet_frag_schedule_worker(f);
 
 	hash &= (INETFRAGS_HASHSZ - 1);
 	hb = &f->hash[hash];
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 54988672d00d..54bd170c5eb4 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -195,7 +195,8 @@ static void ip_expire(unsigned long arg)
 
 	ipq_kill(qp);
 
-	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+	if (!(qp->q.last_in & INET_FRAG_EVICTED))
+		IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
 
 	if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 97acbc490d9e..b3924b10dff3 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -141,7 +141,9 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
 	if (!dev)
 		goto out_rcu_unlock;
 
-	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+	if (!(fq->q.last_in & INET_FRAG_EVICTED))
+		IP6_INC_STATS_BH(net, __in6_dev_get(dev),
+				 IPSTATS_MIB_REASMTIMEOUT);
 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
 
 	/* Don't send error if the first segment did not arrive. */
-- 
cgit v1.2.3


From e3a57d18b06179d68fcf7a0a06ad844493c65e06 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 24 Jul 2014 16:50:35 +0200
Subject: inet: frag: remove periodic secret rebuild timer

merge functionality into the eviction workqueue.

Instead of rebuilding every n seconds, take advantage of the upper
hash chain length limit.

If we hit it, mark table for rebuild and schedule workqueue.
To prevent frequent rebuilds when we're completely overloaded,
don't rebuild more than once every 5 seconds.

ipfrag_secret_interval sysctl is now obsolete and has been marked as
deprecated, it still can be changed so scripts won't be broken but it
won't have any effect. A comment is left above each unused secret_timer
variable to avoid confusion.

Joint work with Nikolay Aleksandrov.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt  | 10 --------
 include/net/inet_frag.h                 |  4 +--
 net/ieee802154/reassembly.c             |  5 ++--
 net/ipv4/inet_fragment.c                | 43 ++++++++++++++++++++++-----------
 net/ipv4/ip_fragment.c                  |  5 ++--
 net/ipv6/netfilter/nf_conntrack_reasm.c |  1 -
 net/ipv6/reassembly.c                   |  5 ++--
 7 files changed, 40 insertions(+), 33 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 625c8dda4be7..e8c304e37831 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -111,11 +111,6 @@ ipfrag_low_thresh - INTEGER
 ipfrag_time - INTEGER
 	Time in seconds to keep an IP fragment in memory.
 
-ipfrag_secret_interval - INTEGER
-	Regeneration interval (in seconds) of the hash secret (or lifetime
-	for the hash secret) for IP fragments.
-	Default: 600
-
 ipfrag_max_dist - INTEGER
 	ipfrag_max_dist is a non-negative integer value which defines the
 	maximum "disorder" which is allowed among fragments which share a
@@ -1164,11 +1159,6 @@ ip6frag_low_thresh - INTEGER
 ip6frag_time - INTEGER
 	Time in seconds to keep an IPv6 fragment in memory.
 
-ip6frag_secret_interval - INTEGER
-	Regeneration interval (in seconds) of the hash secret (or lifetime
-	for the hash secret) for IPv6 fragments.
-	Default: 600
-
 conf/default/*:
 	Change the interface-specific default settings.
 
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 90d21ea62c59..d9cc5bb64854 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -58,11 +58,11 @@ struct inet_frags {
 	 * Its primarily a rebuild protection rwlock.
 	 */
 	rwlock_t		lock ____cacheline_aligned_in_smp;
-	int			secret_interval;
-	struct timer_list	secret_timer;
 
 	struct work_struct	frags_work;
 	unsigned int next_bucket;
+	unsigned long last_rebuild_jiffies;
+	bool rebuild;
 
 	/* The first call to hashfn is responsible to initialize
 	 * rnd. This is best done with net_get_random_once.
diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c
index b4bc7a50eccf..20d219682d84 100644
--- a/net/ieee802154/reassembly.c
+++ b/net/ieee802154/reassembly.c
@@ -419,10 +419,12 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = {
 	{ }
 };
 
+/* secret interval has been deprecated */
+static int lowpan_frags_secret_interval_unused;
 static struct ctl_table lowpan_frags_ctl_table[] = {
 	{
 		.procname	= "6lowpanfrag_secret_interval",
-		.data		= &lowpan_frags.secret_interval,
+		.data		= &lowpan_frags_secret_interval_unused,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
@@ -562,7 +564,6 @@ int __init lowpan_net_frag_init(void)
 	lowpan_frags.qsize = sizeof(struct frag_queue);
 	lowpan_frags.match = lowpan_frag_match;
 	lowpan_frags.frag_expire = lowpan_frag_expire;
-	lowpan_frags.secret_interval = 10 * 60 * HZ;
 	inet_frags_init(&lowpan_frags);
 
 	return ret;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 198a5ed7a815..58d4c38534f6 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -28,6 +28,9 @@
 #define INETFRAGS_EVICT_BUCKETS   128
 #define INETFRAGS_EVICT_MAX	  512
 
+/* don't rebuild inetfrag table with new secret more often than this */
+#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
+
 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
  * Value : 0xff if frame should be dropped.
  *         0 or INET_ECN_CE value, to be ORed in to final iph->tos field
@@ -55,16 +58,24 @@ inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
 	return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
 }
 
-static void inet_frag_secret_rebuild(unsigned long dummy)
+static bool inet_frag_may_rebuild(struct inet_frags *f)
+{
+	return time_after(jiffies,
+	       f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
+}
+
+static void inet_frag_secret_rebuild(struct inet_frags *f)
 {
-	struct inet_frags *f = (struct inet_frags *)dummy;
-	unsigned long now = jiffies;
 	int i;
 
 	/* Per bucket lock NOT needed here, due to write lock protection */
-	write_lock(&f->lock);
+	write_lock_bh(&f->lock);
+
+	if (!inet_frag_may_rebuild(f))
+		goto out;
 
 	get_random_bytes(&f->rnd, sizeof(u32));
+
 	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
 		struct inet_frag_bucket *hb;
 		struct inet_frag_queue *q;
@@ -85,9 +96,11 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
 			}
 		}
 	}
-	write_unlock(&f->lock);
 
-	mod_timer(&f->secret_timer, now + f->secret_interval);
+	f->rebuild = false;
+	f->last_rebuild_jiffies = jiffies;
+out:
+	write_unlock_bh(&f->lock);
 }
 
 static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
@@ -162,6 +175,8 @@ static void inet_frag_worker(struct work_struct *work)
 	f->next_bucket = i;
 
 	read_unlock_bh(&f->lock);
+	if (f->rebuild && inet_frag_may_rebuild(f))
+		inet_frag_secret_rebuild(f);
 }
 
 static void inet_frag_schedule_worker(struct inet_frags *f)
@@ -183,11 +198,7 @@ void inet_frags_init(struct inet_frags *f)
 		INIT_HLIST_HEAD(&hb->chain);
 	}
 	rwlock_init(&f->lock);
-
-	setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
-			(unsigned long)f);
-	f->secret_timer.expires = jiffies + f->secret_interval;
-	add_timer(&f->secret_timer);
+	f->last_rebuild_jiffies = 0;
 }
 EXPORT_SYMBOL(inet_frags_init);
 
@@ -199,7 +210,6 @@ EXPORT_SYMBOL(inet_frags_init_net);
 
 void inet_frags_fini(struct inet_frags *f)
 {
-	del_timer(&f->secret_timer);
 	cancel_work_sync(&f->frags_work);
 }
 EXPORT_SYMBOL(inet_frags_fini);
@@ -399,8 +409,13 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 
 	if (depth <= INETFRAGS_MAXDEPTH)
 		return inet_frag_create(nf, f, key);
-	else
-		return ERR_PTR(-ENOBUFS);
+
+	if (inet_frag_may_rebuild(f)) {
+		f->rebuild = true;
+		inet_frag_schedule_worker(f);
+	}
+
+	return ERR_PTR(-ENOBUFS);
 }
 EXPORT_SYMBOL(inet_frag_find);
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8fbeee495037..44e591a7e03f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -720,10 +720,12 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
 	{ }
 };
 
+/* secret interval has been deprecated */
+static int ip4_frags_secret_interval_unused;
 static struct ctl_table ip4_frags_ctl_table[] = {
 	{
 		.procname	= "ipfrag_secret_interval",
-		.data		= &ip4_frags.secret_interval,
+		.data		= &ip4_frags_secret_interval_unused,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
@@ -853,6 +855,5 @@ void __init ipfrag_init(void)
 	ip4_frags.qsize = sizeof(struct ipq);
 	ip4_frags.match = ip4_frag_match;
 	ip4_frags.frag_expire = ip_expire;
-	ip4_frags.secret_interval = 10 * 60 * HZ;
 	inet_frags_init(&ip4_frags);
 }
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index fb0f72a0ff31..3b3ef9774cc2 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -669,7 +669,6 @@ int nf_ct_frag6_init(void)
 	nf_frags.qsize = sizeof(struct frag_queue);
 	nf_frags.match = ip6_frag_match;
 	nf_frags.frag_expire = nf_ct_frag6_expire;
-	nf_frags.secret_interval = 10 * 60 * HZ;
 	inet_frags_init(&nf_frags);
 
 	ret = register_pernet_subsys(&nf_ct_net_ops);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index af85551682c2..987fea46b915 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -604,10 +604,12 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
 	{ }
 };
 
+/* secret interval has been deprecated */
+static int ip6_frags_secret_interval_unused;
 static struct ctl_table ip6_frags_ctl_table[] = {
 	{
 		.procname	= "ip6frag_secret_interval",
-		.data		= &ip6_frags.secret_interval,
+		.data		= &ip6_frags_secret_interval_unused,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
@@ -737,7 +739,6 @@ int __init ipv6_frag_init(void)
 	ip6_frags.qsize = sizeof(struct frag_queue);
 	ip6_frags.match = ip6_frag_match;
 	ip6_frags.frag_expire = ip6_frag_expire;
-	ip6_frags.secret_interval = 10 * 60 * HZ;
 	inet_frags_init(&ip6_frags);
 out:
 	return ret;
-- 
cgit v1.2.3


From 1bab4c75075b84675b96992ac47580a57c26958d Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Thu, 24 Jul 2014 16:50:37 +0200
Subject: inet: frag: set limits and make init_net's high_thresh limit global

This patch makes init_net's high_thresh limit to be the maximum for all
namespaces, thus introducing a global memory limit threshold equal to the
sum of the individual high_thresh limits which are capped.
It also introduces some sane minimums for low_thresh as it shouldn't be
able to drop below 0 (or > high_thresh in the unsigned case), and
overall low_thresh should not ever be above high_thresh, so we make the
following relations for a namespace:
init_net:
 high_thresh - max(not capped), min(init_net low_thresh)
 low_thresh - max(init_net high_thresh), min (0)

all other namespaces:
 high_thresh = max(init_net high_thresh), min(namespace's low_thresh)
 low_thresh = max(namespace's high_thresh), min(0)

The major issue with having low_thresh > high_thresh is that we'll
schedule eviction but never evict anything and thus rely only on the
timers.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt  |  3 ++-
 net/ieee802154/reassembly.c             | 12 ++++++++++--
 net/ipv4/ip_fragment.c                  | 10 ++++++++--
 net/ipv6/netfilter/nf_conntrack_reasm.c | 12 ++++++++++--
 net/ipv6/reassembly.c                   | 12 ++++++++++--
 5 files changed, 40 insertions(+), 9 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index e8c304e37831..29a93518bf18 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -101,7 +101,8 @@ ipfrag_high_thresh - INTEGER
 	Maximum memory used to reassemble IP fragments. When
 	ipfrag_high_thresh bytes of memory is allocated for this purpose,
 	the fragment handler will toss packets until ipfrag_low_thresh
-	is reached.
+	is reached. This also serves as a maximum limit to namespaces
+	different from the initial one.
 
 ipfrag_low_thresh - INTEGER
 	Maximum memory used to reassemble IP fragments before the kernel
diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c
index 8da635d92a58..f13d4f32e207 100644
--- a/net/ieee802154/reassembly.c
+++ b/net/ieee802154/reassembly.c
@@ -386,20 +386,25 @@ err:
 EXPORT_SYMBOL(lowpan_frag_rcv);
 
 #ifdef CONFIG_SYSCTL
+static int zero;
+
 static struct ctl_table lowpan_frags_ns_ctl_table[] = {
 	{
 		.procname	= "6lowpanfrag_high_thresh",
 		.data		= &init_net.ieee802154_lowpan.frags.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &init_net.ieee802154_lowpan.frags.low_thresh
 	},
 	{
 		.procname	= "6lowpanfrag_low_thresh",
 		.data		= &init_net.ieee802154_lowpan.frags.low_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &init_net.ieee802154_lowpan.frags.high_thresh
 	},
 	{
 		.procname	= "6lowpanfrag_time",
@@ -446,7 +451,10 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
 			goto err_alloc;
 
 		table[0].data = &ieee802154_lowpan->frags.high_thresh;
+		table[0].extra1 = &ieee802154_lowpan->frags.low_thresh;
+		table[0].extra2 = &init_net.ieee802154_lowpan.frags.high_thresh;
 		table[1].data = &ieee802154_lowpan->frags.low_thresh;
+		table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
 		table[2].data = &ieee802154_lowpan->frags.timeout;
 		table[3].data = &ieee802154_lowpan->max_dsize;
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index ccee68dffd6e..634fc31aa243 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -700,14 +700,17 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
 		.data		= &init_net.ipv4.frags.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &init_net.ipv4.frags.low_thresh
 	},
 	{
 		.procname	= "ipfrag_low_thresh",
 		.data		= &init_net.ipv4.frags.low_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &init_net.ipv4.frags.high_thresh
 	},
 	{
 		.procname	= "ipfrag_time",
@@ -752,7 +755,10 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
 			goto err_alloc;
 
 		table[0].data = &net->ipv4.frags.high_thresh;
+		table[0].extra1 = &net->ipv4.frags.low_thresh;
+		table[0].extra2 = &init_net.ipv4.frags.high_thresh;
 		table[1].data = &net->ipv4.frags.low_thresh;
+		table[1].extra2 = &net->ipv4.frags.high_thresh;
 		table[2].data = &net->ipv4.frags.timeout;
 
 		/* Don't export sysctls to unprivileged users */
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 4d9da1e35f8c..3d4bccf6d67d 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -63,6 +63,8 @@ struct nf_ct_frag6_skb_cb
 static struct inet_frags nf_frags;
 
 #ifdef CONFIG_SYSCTL
+static int zero;
+
 static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 	{
 		.procname	= "nf_conntrack_frag6_timeout",
@@ -76,14 +78,17 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 		.data		= &init_net.nf_frag.frags.low_thresh,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &init_net.nf_frag.frags.high_thresh
 	},
 	{
 		.procname	= "nf_conntrack_frag6_high_thresh",
 		.data		= &init_net.nf_frag.frags.high_thresh,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &init_net.nf_frag.frags.low_thresh
 	},
 	{ }
 };
@@ -102,7 +107,10 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
 
 		table[0].data = &net->nf_frag.frags.timeout;
 		table[1].data = &net->nf_frag.frags.low_thresh;
+		table[1].extra2 = &net->nf_frag.frags.high_thresh;
 		table[2].data = &net->nf_frag.frags.high_thresh;
+		table[2].extra1 = &net->nf_frag.frags.low_thresh;
+		table[2].extra2 = &init_net.nf_frag.frags.high_thresh;
 	}
 
 	hdr = register_net_sysctl(net, "net/netfilter", table);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 57a9707b2032..f1709c4a289a 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -578,20 +578,25 @@ static const struct inet6_protocol frag_protocol =
 };
 
 #ifdef CONFIG_SYSCTL
+static int zero;
+
 static struct ctl_table ip6_frags_ns_ctl_table[] = {
 	{
 		.procname	= "ip6frag_high_thresh",
 		.data		= &init_net.ipv6.frags.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &init_net.ipv6.frags.low_thresh
 	},
 	{
 		.procname	= "ip6frag_low_thresh",
 		.data		= &init_net.ipv6.frags.low_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &init_net.ipv6.frags.high_thresh
 	},
 	{
 		.procname	= "ip6frag_time",
@@ -628,7 +633,10 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
 			goto err_alloc;
 
 		table[0].data = &net->ipv6.frags.high_thresh;
+		table[0].extra1 = &net->ipv6.frags.low_thresh;
+		table[0].extra2 = &init_net.ipv6.frags.high_thresh;
 		table[1].data = &net->ipv6.frags.low_thresh;
+		table[1].extra2 = &net->ipv6.frags.high_thresh;
 		table[2].data = &net->ipv6.frags.timeout;
 
 		/* Don't export sysctls to unprivileged users */
-- 
cgit v1.2.3


From 1217dda7cb2e93dc236cba4544239c586f391099 Mon Sep 17 00:00:00 2001
From: Arend van Spriel <arend@broadcom.com>
Date: Sun, 29 Jun 2014 16:16:58 +0200
Subject: dt: bindings: add bindings for Broadcom bcm43xx sdio devices

The Broadcom bcm43xx sdio devices are fullmac devices that may be
integrated in ARM platforms. Currently, the brcmfmac driver for
these devices support use of platform data. This patch specifies
the bindings that allow this platform data to be expressed in the
devicetree.

Reviewed-by: Hante Meuleman <meuleman@broadcom.com>
Reviewed-by: Franky (Zhenhui) Lin <frankyl@broadcom.com>
Reviewed-by: Daniel (Deognyoun) Kim <dekim@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
[hdegoede@redhat.com: drop clk / reg_on gpio handling, as there is no consensus
 on how to handle this yet]
[hdegoede@redhat.com: move from bindings/staging to bindings]
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 .../bindings/net/wireless/brcm,bcm43xx-fmac.txt    | 41 ++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/wireless/brcm,bcm43xx-fmac.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/wireless/brcm,bcm43xx-fmac.txt b/Documentation/devicetree/bindings/net/wireless/brcm,bcm43xx-fmac.txt
new file mode 100644
index 000000000000..5dbf169cd81c
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/wireless/brcm,bcm43xx-fmac.txt
@@ -0,0 +1,41 @@
+Broadcom BCM43xx Fullmac wireless SDIO devices
+
+This node provides properties for controlling the Broadcom wireless device. The
+node is expected to be specified as a child node to the SDIO controller that
+connects the device to the system.
+
+Required properties:
+
+ - compatible : Should be "brcm,bcm4329-fmac".
+
+Optional properties:
+ - brcm,drive-strength : drive strength used for SDIO pins on device in mA
+	(default = 6).
+ - interrupt-parent : the phandle for the interrupt controller to which the
+	device interrupts are connected.
+ - interrupts : specifies attributes for the out-of-band interrupt (host-wake).
+	When not specified the device will use in-band SDIO interrupts.
+ - interrupt-names : name of the out-of-band interrupt, which must be set
+	to "host-wake".
+
+Example:
+
+mmc3: mmc@01c12000 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&mmc3_pins_a>;
+	vmmc-supply = <&reg_vmmc3>;
+	bus-width = <4>;
+	non-removable;
+	status = "okay";
+
+	brcmf: bcrmf@1 {
+		reg = <1>;
+		compatible = "brcm,bcm4329-fmac";
+		interrupt-parent = <&pio>;
+		interrupts = <10 8>; /* PH10 / EINT10 */
+		interrupt-names = "host-wake";
+	};
+};
-- 
cgit v1.2.3


From 68a360e82e55c9b35097e7be7f7991d8f401032f Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 25 Jul 2014 18:01:31 -0400
Subject: packet: remove deprecated syststamp timestamp

No device driver will ever return an skb_shared_info structure with
syststamp non-zero, so remove the branch that tests for this and
optionally marks the packet timestamp as TP_STATUS_TS_SYS_HARDWARE.

Do not remove the definition TP_STATUS_TS_SYS_HARDWARE, as processes
may refer to it.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/packet_mmap.txt | 18 ++++++------------
 include/uapi/linux/if_packet.h           |  2 +-
 net/packet/af_packet.c                   | 12 ++++--------
 3 files changed, 11 insertions(+), 21 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 38112d512f47..a6d7cb91069e 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -1008,14 +1008,9 @@ hardware timestamps to be used. Note: you may need to enable the generation
 of hardware timestamps with SIOCSHWTSTAMP (see related information from
 Documentation/networking/timestamping.txt).
 
-PACKET_TIMESTAMP accepts the same integer bit field as
-SO_TIMESTAMPING.  However, only the SOF_TIMESTAMPING_SYS_HARDWARE
-and SOF_TIMESTAMPING_RAW_HARDWARE values are recognized by
-PACKET_TIMESTAMP.  SOF_TIMESTAMPING_SYS_HARDWARE takes precedence over
-SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set.
-
-    int req = 0;
-    req |= SOF_TIMESTAMPING_SYS_HARDWARE;
+PACKET_TIMESTAMP accepts the same integer bit field as SO_TIMESTAMPING:
+
+    int req = SOF_TIMESTAMPING_RAW_HARDWARE;
     setsockopt(fd, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req, sizeof(req))
 
 For the mmap(2)ed ring buffers, such timestamps are stored in the
@@ -1023,14 +1018,13 @@ tpacket{,2,3}_hdr structure's tp_sec and tp_{n,u}sec members. To determine
 what kind of timestamp has been reported, the tp_status field is binary |'ed
 with the following possible bits ...
 
-    TP_STATUS_TS_SYS_HARDWARE
     TP_STATUS_TS_RAW_HARDWARE
     TP_STATUS_TS_SOFTWARE
 
 ... that are equivalent to its SOF_TIMESTAMPING_* counterparts. For the
-RX_RING, if none of those 3 are set (i.e. PACKET_TIMESTAMP is not set),
-then this means that a software fallback was invoked *within* PF_PACKET's
-processing code (less precise).
+RX_RING, if neither is set (i.e. PACKET_TIMESTAMP is not set), then a
+software fallback was invoked *within* PF_PACKET's processing code (less
+precise).
 
 Getting timestamps for the TX_RING works as follows: i) fill the ring frames,
 ii) call sendto() e.g. in blocking mode, iii) wait for status of relevant
diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index bac27fa05f5b..da2d668b8cf1 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -108,7 +108,7 @@ struct tpacket_auxdata {
 
 /* Rx and Tx ring - header status */
 #define TP_STATUS_TS_SOFTWARE		(1 << 29)
-#define TP_STATUS_TS_SYS_HARDWARE	(1 << 30)
+#define TP_STATUS_TS_SYS_HARDWARE	(1 << 30) /* deprecated, never set */
 #define TP_STATUS_TS_RAW_HARDWARE	(1 << 31)
 
 /* Rx ring - feature request bits */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 614ca91f785a..8d9f8042705a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -441,14 +441,10 @@ static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
 {
 	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
 
-	if (shhwtstamps) {
-		if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) &&
-		    ktime_to_timespec_cond(shhwtstamps->syststamp, ts))
-			return TP_STATUS_TS_SYS_HARDWARE;
-		if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
-		    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
-			return TP_STATUS_TS_RAW_HARDWARE;
-	}
+	if (shhwtstamps &&
+	    (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
+		return TP_STATUS_TS_RAW_HARDWARE;
 
 	if (ktime_to_timespec_cond(skb->tstamp, ts))
 		return TP_STATUS_TS_SOFTWARE;
-- 
cgit v1.2.3


From 4d276eb6a478307a28ae843836c455bf04b37a3c Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 25 Jul 2014 18:01:32 -0400
Subject: net: remove deprecated syststamp timestamp

The SO_TIMESTAMPING API defines three types of timestamps: software,
hardware in raw format (hwtstamp) and hardware converted to system
format (syststamp). The last has been deprecated in favor of combining
hwtstamp with a PTP clock driver. There are no active users in the
kernel.

The option was device driver dependent. If set, but without hardware
support, the correct behavior is to return zero in the relevant field
in the SCM_TIMESTAMPING ancillary message. Without device drivers
implementing the option, this field is effectively always zero.

Remove the internal plumbing to dissuage new drivers from implementing
the feature. Keep the SOF_TIMESTAMPING_SYS_HARDWARE flag, however, to
avoid breaking existing applications that request the timestamp.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/timestamping.txt            | 12 ++----------
 Documentation/networking/timestamping/timestamping.c |  7 +------
 include/linux/skbuff.h                               | 14 +-------------
 include/net/sock.h                                   | 11 +++--------
 net/core/sock.c                                      |  4 ----
 net/socket.c                                         | 12 ++++--------
 6 files changed, 11 insertions(+), 49 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index 8b4ad809df27..897f942b976b 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -88,15 +88,8 @@ hwtimeraw is the original hardware time stamp. Filled in if
 SOF_TIMESTAMPING_RAW_HARDWARE is set. No assumptions about its
 relation to system time should be made.
 
-hwtimetrans is the hardware time stamp transformed so that it
-corresponds as good as possible to system time. This correlation is
-not perfect; as a consequence, sorting packets received via different
-NICs by their hwtimetrans may differ from the order in which they were
-received. hwtimetrans may be non-monotonic even for the same NIC.
-Filled in if SOF_TIMESTAMPING_SYS_HARDWARE is set. Requires support
-by the network device and will be empty without that support. This
-field is DEPRECATED. Only one driver computes this value. New device
-drivers must leave this zero. Instead, they can expose the hardware
+hwtimetrans is always zero. This field is deprecated. It used to hold
+hw timestamps converted to system time. Instead, expose the hardware
 clock device on the NIC directly as a HW PTP clock source, to allow
 time conversion in userspace and optionally synchronize system time
 with a userspace PTP stack such as linuxptp. For the PTP clock API,
@@ -191,7 +184,6 @@ struct skb_shared_hwtstamps {
 	 * since arbitrary point in time
 	 */
 	ktime_t	hwtstamp;
-	ktime_t	syststamp; /* hwtstamp transformed to system time base */
 };
 
 Time stamps for outgoing packets are to be generated as follows:
diff --git a/Documentation/networking/timestamping/timestamping.c b/Documentation/networking/timestamping/timestamping.c
index 8ba82bfe6a33..5cdfd743447b 100644
--- a/Documentation/networking/timestamping/timestamping.c
+++ b/Documentation/networking/timestamping/timestamping.c
@@ -76,7 +76,6 @@ static void usage(const char *error)
 	       "  SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n"
 	       "  SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n"
 	       "  SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
-	       "  SOF_TIMESTAMPING_SYS_HARDWARE - request reporting of transformed HW time stamps\n"
 	       "  SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
 	       "  SIOCGSTAMP - check last socket time stamp\n"
 	       "  SIOCGSTAMPNS - more accurate socket time stamp\n");
@@ -202,9 +201,7 @@ static void printpacket(struct msghdr *msg, int res,
 				       (long)stamp->tv_sec,
 				       (long)stamp->tv_nsec);
 				stamp++;
-				printf("HW transformed %ld.%09ld ",
-				       (long)stamp->tv_sec,
-				       (long)stamp->tv_nsec);
+				/* skip deprecated HW transformed */
 				stamp++;
 				printf("HW raw %ld.%09ld",
 				       (long)stamp->tv_sec,
@@ -361,8 +358,6 @@ int main(int argc, char **argv)
 			so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
 		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE"))
 			so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE;
-		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SYS_HARDWARE"))
-			so_timestamping_flags |= SOF_TIMESTAMPING_SYS_HARDWARE;
 		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE"))
 			so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
 		else
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b613557132b9..281deced7469 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -210,20 +210,9 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
  * struct skb_shared_hwtstamps - hardware time stamps
  * @hwtstamp:	hardware time stamp transformed into duration
  *		since arbitrary point in time
- * @syststamp:	hwtstamp transformed to system time base (deprecated)
  *
  * Software time stamps generated by ktime_get_real() are stored in
- * skb->tstamp. The relation between the different kinds of time
- * stamps is as follows:
- *
- * syststamp and tstamp can be compared against each other in
- * arbitrary combinations.  The accuracy of a
- * syststamp/tstamp/"syststamp from other device" comparison is
- * limited by the accuracy of the transformation into system time
- * base. This depends on the device driver and its underlying
- * hardware. The syststamp implementation is deprecated in favor
- * of hwtstamps and hw PTP clock sources exposed directly to
- * userspace.
+ * skb->tstamp.
  *
  * hwtstamps can only be compared against other hwtstamps from
  * the same device.
@@ -233,7 +222,6 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
  */
 struct skb_shared_hwtstamps {
 	ktime_t	hwtstamp;
-	ktime_t	syststamp;
 };
 
 /* Definitions for tx_flags in struct skb_shared_info */
diff --git a/include/net/sock.h b/include/net/sock.h
index 720773304a85..b91c8868ab8d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -707,7 +707,6 @@ enum sock_flags {
 	SOCK_TIMESTAMPING_RX_SOFTWARE,  /* %SOF_TIMESTAMPING_RX_SOFTWARE */
 	SOCK_TIMESTAMPING_SOFTWARE,     /* %SOF_TIMESTAMPING_SOFTWARE */
 	SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */
-	SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */
 	SOCK_FASYNC, /* fasync() active */
 	SOCK_RXQ_OVFL,
 	SOCK_ZEROCOPY, /* buffers from userspace */
@@ -2166,16 +2165,13 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 	 * - software time stamp available and wanted
 	 *   (SOCK_TIMESTAMPING_SOFTWARE)
 	 * - hardware time stamps available and wanted
-	 *   (SOCK_TIMESTAMPING_SYS_HARDWARE or
-	 *   SOCK_TIMESTAMPING_RAW_HARDWARE)
+	 *   SOCK_TIMESTAMPING_RAW_HARDWARE
 	 */
 	if (sock_flag(sk, SOCK_RCVTSTAMP) ||
 	    sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) ||
 	    (kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) ||
 	    (hwtstamps->hwtstamp.tv64 &&
-	     sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) ||
-	    (hwtstamps->syststamp.tv64 &&
-	     sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE)))
+	     sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)))
 		__sock_recv_timestamp(msg, sk, skb);
 	else
 		sk->sk_stamp = kt;
@@ -2193,8 +2189,7 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
 #define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL)			| \
 			   (1UL << SOCK_RCVTSTAMP)			| \
 			   (1UL << SOCK_TIMESTAMPING_SOFTWARE)		| \
-			   (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE)	| \
-			   (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE))
+			   (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE))
 
 	if (sk->sk_flags & FLAGS_TS_OR_DROPS)
 		__sock_recv_ts_and_drops(msg, sk, skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index ca9b65199d28..134291d73fcd 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -862,8 +862,6 @@ set_rcvbuf:
 					       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
 		sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
 				  val & SOF_TIMESTAMPING_SOFTWARE);
-		sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
-				  val & SOF_TIMESTAMPING_SYS_HARDWARE);
 		sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
 				  val & SOF_TIMESTAMPING_RAW_HARDWARE);
 		break;
@@ -1102,8 +1100,6 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 			v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
 		if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
 			v.val |= SOF_TIMESTAMPING_SOFTWARE;
-		if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
-			v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
 		if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
 			v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
 		break;
diff --git a/net/socket.c b/net/socket.c
index abf56b2a14f9..d8222c025061 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -725,14 +725,10 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 	if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
 	    ktime_to_timespec_cond(skb->tstamp, ts + 0))
 		empty = 0;
-	if (shhwtstamps) {
-		if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
-		    ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))
-			empty = 0;
-		if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
-		    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
-			empty = 0;
-	}
+	if (shhwtstamps &&
+	    sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
+	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
+		empty = 0;
 	if (!empty)
 		put_cmsg(msg, SOL_SOCKET,
 			 SCM_TIMESTAMPING, sizeof(ts), &ts);
-- 
cgit v1.2.3


From 1a2a909bad464fc9ebdb03482049336fe2e7cbc8 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Tue, 29 Jul 2014 08:57:14 -0500
Subject: amd-xgbe: Add dma-coherent to device bindings documentation

An earlier patch added support for the "dma-coherent" device property.
This patch adds this optional property to the amd-xgbe device bindings
documentation.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/amd-xgbe.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/amd-xgbe.txt b/Documentation/devicetree/bindings/net/amd-xgbe.txt
index ea0c7908a3b8..f6db1ba87a3f 100644
--- a/Documentation/devicetree/bindings/net/amd-xgbe.txt
+++ b/Documentation/devicetree/bindings/net/amd-xgbe.txt
@@ -18,6 +18,7 @@ Required properties:
 Optional properties:
 - mac-address: mac address to be assigned to the device. Can be overridden
   by UEFI.
+- dma-coherent: Present if dma operations are coherent
 
 Example:
 	xgbe@e0700000 {
-- 
cgit v1.2.3


From 23e4eef7cf56b5e36e76af9078f0012826c86b2f Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Tue, 29 Jul 2014 08:57:19 -0500
Subject: amd-xgbe: Add hardware timestamp support

This patch adds support for Tx and Rx hardware timestamping.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/amd-xgbe.txt |  16 +-
 drivers/net/ethernet/amd/Kconfig                   |   1 +
 drivers/net/ethernet/amd/xgbe/Makefile             |   3 +-
 drivers/net/ethernet/amd/xgbe/xgbe-common.h        |  71 ++++
 drivers/net/ethernet/amd/xgbe/xgbe-desc.c          |   9 +
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c           | 143 +++++++-
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c           | 372 ++++++++++++++++++---
 drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c       |  35 ++
 drivers/net/ethernet/amd/xgbe/xgbe-main.c          |  21 +-
 drivers/net/ethernet/amd/xgbe/xgbe-ptp.c           | 285 ++++++++++++++++
 drivers/net/ethernet/amd/xgbe/xgbe.h               |  56 +++-
 11 files changed, 954 insertions(+), 58 deletions(-)
 create mode 100644 drivers/net/ethernet/amd/xgbe/xgbe-ptp.c

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/amd-xgbe.txt b/Documentation/devicetree/bindings/net/amd-xgbe.txt
index f6db1ba87a3f..41354f730beb 100644
--- a/Documentation/devicetree/bindings/net/amd-xgbe.txt
+++ b/Documentation/devicetree/bindings/net/amd-xgbe.txt
@@ -8,10 +8,14 @@ Required properties:
 - interrupt-parent: Should be the phandle for the interrupt controller
   that services interrupts for this device
 - interrupts: Should contain the amd-xgbe interrupt
-- clocks: Should be the DMA clock for the amd-xgbe device (used for
-  calculating the correct Rx interrupt watchdog timer value on a DMA
-  channel for coalescing)
-- clock-names: Should be the name of the DMA clock, "dma_clk"
+- clocks:
+   - DMA clock for the amd-xgbe device (used for calculating the
+     correct Rx interrupt watchdog timer value on a DMA channel
+     for coalescing)
+   - PTP clock for the amd-xgbe device
+- clock-names: Should be the names of the clocks
+   - "dma_clk" for the DMA clock
+   - "ptp_clk" for the PTP clock
 - phy-handle: See ethernet.txt file in the same directory
 - phy-mode: See ethernet.txt file in the same directory
 
@@ -27,8 +31,8 @@ Example:
 		      <0 0xe0780000 0 0x80000>;
 		interrupt-parent = <&gic>;
 		interrupts = <0 325 4>;
-		clocks = <&xgbe_clk>;
-		clock-names = "dma_clk";
+		clocks = <&xgbe_dma_clk>, <&xgbe_ptp_clk>;
+		clock-names = "dma_clk", "ptp_clk";
 		phy-handle = <&phy>;
 		phy-mode = "xgmii";
 		mac-address = [ 02 a1 a2 a3 a4 a5 ];
diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig
index 6e314dbba805..98ef8ff8fa08 100644
--- a/drivers/net/ethernet/amd/Kconfig
+++ b/drivers/net/ethernet/amd/Kconfig
@@ -184,6 +184,7 @@ config AMD_XGBE
 	select AMD_XGBE_PHY
 	select BITREVERSE
 	select CRC32
+	select PTP_1588_CLOCK
 	---help---
 	  This driver supports the AMD 10GbE Ethernet device found on an
 	  AMD SoC.
diff --git a/drivers/net/ethernet/amd/xgbe/Makefile b/drivers/net/ethernet/amd/xgbe/Makefile
index 26cf9af1642f..66c49b43e5f2 100644
--- a/drivers/net/ethernet/amd/xgbe/Makefile
+++ b/drivers/net/ethernet/amd/xgbe/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_AMD_XGBE) += amd-xgbe.o
 
 amd-xgbe-objs := xgbe-main.o xgbe-drv.o xgbe-dev.o \
-		 xgbe-desc.o xgbe-ethtool.o xgbe-mdio.o
+		 xgbe-desc.o xgbe-ethtool.o xgbe-mdio.o \
+		 xgbe-ptp.o
 
 amd-xgbe-$(CONFIG_DEBUG_FS) += xgbe-debugfs.o
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index 7ec80ac7043f..646702cd75fb 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -307,6 +307,16 @@
 #define MAC_MACA0LR			0x0304
 #define MAC_MACA1HR			0x0308
 #define MAC_MACA1LR			0x030c
+#define MAC_TSCR			0x0d00
+#define MAC_SSIR			0x0d04
+#define MAC_STSR			0x0d08
+#define MAC_STNR			0x0d0c
+#define MAC_STSUR			0x0d10
+#define MAC_STNUR			0x0d14
+#define MAC_TSAR			0x0d18
+#define MAC_TSSR			0x0d20
+#define MAC_TXSNR			0x0d30
+#define MAC_TXSSR			0x0d34
 
 #define MAC_QTFCR_INC			4
 #define MAC_MACA_INC			4
@@ -373,12 +383,16 @@
 #define MAC_HWF2R_TXCHCNT_WIDTH		4
 #define MAC_HWF2R_TXQCNT_INDEX		6
 #define MAC_HWF2R_TXQCNT_WIDTH		4
+#define MAC_IER_TSIE_INDEX		12
+#define MAC_IER_TSIE_WIDTH		1
 #define MAC_ISR_MMCRXIS_INDEX		9
 #define MAC_ISR_MMCRXIS_WIDTH		1
 #define MAC_ISR_MMCTXIS_INDEX		10
 #define MAC_ISR_MMCTXIS_WIDTH		1
 #define MAC_ISR_PMTIS_INDEX		4
 #define MAC_ISR_PMTIS_WIDTH		1
+#define MAC_ISR_TSIS_INDEX		12
+#define MAC_ISR_TSIS_WIDTH		1
 #define MAC_MACA1HR_AE_INDEX		31
 #define MAC_MACA1HR_AE_WIDTH		1
 #define MAC_PFR_HMC_INDEX		2
@@ -423,10 +437,48 @@
 #define MAC_RFCR_RFE_WIDTH		1
 #define MAC_RQC0R_RXQ0EN_INDEX		0
 #define MAC_RQC0R_RXQ0EN_WIDTH		2
+#define MAC_SSIR_SNSINC_INDEX		8
+#define MAC_SSIR_SNSINC_WIDTH		8
+#define MAC_SSIR_SSINC_INDEX		16
+#define MAC_SSIR_SSINC_WIDTH		8
 #define MAC_TCR_SS_INDEX		29
 #define MAC_TCR_SS_WIDTH		2
 #define MAC_TCR_TE_INDEX		0
 #define MAC_TCR_TE_WIDTH		1
+#define MAC_TSCR_AV8021ASMEN_INDEX	28
+#define MAC_TSCR_AV8021ASMEN_WIDTH	1
+#define MAC_TSCR_SNAPTYPSEL_INDEX	16
+#define MAC_TSCR_SNAPTYPSEL_WIDTH	2
+#define MAC_TSCR_TSADDREG_INDEX		5
+#define MAC_TSCR_TSADDREG_WIDTH		1
+#define MAC_TSCR_TSCFUPDT_INDEX		1
+#define MAC_TSCR_TSCFUPDT_WIDTH		1
+#define MAC_TSCR_TSCTRLSSR_INDEX	9
+#define MAC_TSCR_TSCTRLSSR_WIDTH	1
+#define MAC_TSCR_TSENA_INDEX		0
+#define MAC_TSCR_TSENA_WIDTH		1
+#define MAC_TSCR_TSENALL_INDEX		8
+#define MAC_TSCR_TSENALL_WIDTH		1
+#define MAC_TSCR_TSEVNTENA_INDEX	14
+#define MAC_TSCR_TSEVNTENA_WIDTH	1
+#define MAC_TSCR_TSINIT_INDEX		2
+#define MAC_TSCR_TSINIT_WIDTH		1
+#define MAC_TSCR_TSIPENA_INDEX		11
+#define MAC_TSCR_TSIPENA_WIDTH		1
+#define MAC_TSCR_TSIPV4ENA_INDEX	13
+#define MAC_TSCR_TSIPV4ENA_WIDTH	1
+#define MAC_TSCR_TSIPV6ENA_INDEX	12
+#define MAC_TSCR_TSIPV6ENA_WIDTH	1
+#define MAC_TSCR_TSMSTRENA_INDEX	15
+#define MAC_TSCR_TSMSTRENA_WIDTH	1
+#define MAC_TSCR_TSVER2ENA_INDEX	10
+#define MAC_TSCR_TSVER2ENA_WIDTH	1
+#define MAC_TSCR_TXTSSTSM_INDEX		24
+#define MAC_TSCR_TXTSSTSM_WIDTH		1
+#define MAC_TSSR_TXTSC_INDEX		15
+#define MAC_TSSR_TXTSC_WIDTH		1
+#define MAC_TXSNR_TXTSSTSMIS_INDEX	31
+#define MAC_TXSNR_TXTSSTSMIS_WIDTH	1
 #define MAC_VLANHTR_VLHT_INDEX		0
 #define MAC_VLANHTR_VLHT_WIDTH		16
 #define MAC_VLANIR_VLTI_INDEX		20
@@ -778,9 +830,19 @@
 #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_WIDTH	1
 #define RX_PACKET_ATTRIBUTES_INCOMPLETE_INDEX	2
 #define RX_PACKET_ATTRIBUTES_INCOMPLETE_WIDTH	1
+#define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_INDEX	3
+#define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_WIDTH	1
+#define RX_PACKET_ATTRIBUTES_CONTEXT_INDEX	4
+#define RX_PACKET_ATTRIBUTES_CONTEXT_WIDTH	1
+#define RX_PACKET_ATTRIBUTES_RX_TSTAMP_INDEX	5
+#define RX_PACKET_ATTRIBUTES_RX_TSTAMP_WIDTH	1
 
 #define RX_NORMAL_DESC0_OVT_INDEX		0
 #define RX_NORMAL_DESC0_OVT_WIDTH		16
+#define RX_NORMAL_DESC3_CDA_INDEX		27
+#define RX_NORMAL_DESC3_CDA_WIDTH		1
+#define RX_NORMAL_DESC3_CTXT_INDEX		30
+#define RX_NORMAL_DESC3_CTXT_WIDTH		1
 #define RX_NORMAL_DESC3_ES_INDEX		15
 #define RX_NORMAL_DESC3_ES_WIDTH		1
 #define RX_NORMAL_DESC3_ETLT_INDEX		16
@@ -794,12 +856,19 @@
 #define RX_NORMAL_DESC3_PL_INDEX		0
 #define RX_NORMAL_DESC3_PL_WIDTH		14
 
+#define RX_CONTEXT_DESC3_TSA_INDEX		4
+#define RX_CONTEXT_DESC3_TSA_WIDTH		1
+#define RX_CONTEXT_DESC3_TSD_INDEX		6
+#define RX_CONTEXT_DESC3_TSD_WIDTH		1
+
 #define TX_PACKET_ATTRIBUTES_CSUM_ENABLE_INDEX	0
 #define TX_PACKET_ATTRIBUTES_CSUM_ENABLE_WIDTH	1
 #define TX_PACKET_ATTRIBUTES_TSO_ENABLE_INDEX	1
 #define TX_PACKET_ATTRIBUTES_TSO_ENABLE_WIDTH	1
 #define TX_PACKET_ATTRIBUTES_VLAN_CTAG_INDEX	2
 #define TX_PACKET_ATTRIBUTES_VLAN_CTAG_WIDTH	1
+#define TX_PACKET_ATTRIBUTES_PTP_INDEX		3
+#define TX_PACKET_ATTRIBUTES_PTP_WIDTH		1
 
 #define TX_CONTEXT_DESC2_MSS_INDEX		0
 #define TX_CONTEXT_DESC2_MSS_WIDTH		15
@@ -816,6 +885,8 @@
 #define TX_NORMAL_DESC2_HL_B1L_WIDTH		14
 #define TX_NORMAL_DESC2_IC_INDEX		31
 #define TX_NORMAL_DESC2_IC_WIDTH		1
+#define TX_NORMAL_DESC2_TTSE_INDEX		30
+#define TX_NORMAL_DESC2_TTSE_WIDTH		1
 #define TX_NORMAL_DESC2_VTIR_INDEX		14
 #define TX_NORMAL_DESC2_VTIR_WIDTH		2
 #define TX_NORMAL_DESC3_CIC_INDEX		16
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
index a9ce56d5e988..1c5d62e8dab6 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
@@ -359,6 +359,15 @@ static void xgbe_unmap_skb(struct xgbe_prv_data *pdata,
 	rdata->len = 0;
 	rdata->interrupt = 0;
 	rdata->mapped_as_page = 0;
+
+	if (rdata->state_saved) {
+		rdata->state_saved = 0;
+		rdata->state.incomplete = 0;
+		rdata->state.context_next = 0;
+		rdata->state.skb = NULL;
+		rdata->state.len = 0;
+		rdata->state.error = 0;
+	}
 }
 
 static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 699cff5d3184..098a7461db2d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -131,7 +131,7 @@ static unsigned int xgbe_usec_to_riwt(struct xgbe_prv_data *pdata,
 
 	DBGPR("-->xgbe_usec_to_riwt\n");
 
-	rate = clk_get_rate(pdata->sysclock);
+	rate = clk_get_rate(pdata->sysclk);
 
 	/*
 	 * Convert the input usec value to the watchdog timer value. Each
@@ -154,7 +154,7 @@ static unsigned int xgbe_riwt_to_usec(struct xgbe_prv_data *pdata,
 
 	DBGPR("-->xgbe_riwt_to_usec\n");
 
-	rate = clk_get_rate(pdata->sysclock);
+	rate = clk_get_rate(pdata->sysclk);
 
 	/*
 	 * Convert the input watchdog timer value to the usec value. Each
@@ -492,8 +492,12 @@ static void xgbe_enable_mtl_interrupts(struct xgbe_prv_data *pdata)
 
 static void xgbe_enable_mac_interrupts(struct xgbe_prv_data *pdata)
 {
-	/* No MAC interrupts to be enabled */
-	XGMAC_IOWRITE(pdata, MAC_IER, 0);
+	unsigned int mac_ier = 0;
+
+	/* Enable Timestamp interrupt */
+	XGMAC_SET_BITS(mac_ier, MAC_IER, TSIE, 1);
+
+	XGMAC_IOWRITE(pdata, MAC_IER, mac_ier);
 
 	/* Enable all counter interrupts */
 	XGMAC_IOWRITE_BITS(pdata, MMC_RIER, ALL_INTERRUPTS, 0xff);
@@ -1012,6 +1016,107 @@ static void xgbe_rx_desc_init(struct xgbe_channel *channel)
 	DBGPR("<--rx_desc_init\n");
 }
 
+static void xgbe_update_tstamp_addend(struct xgbe_prv_data *pdata,
+				      unsigned int addend)
+{
+	/* Set the addend register value and tell the device */
+	XGMAC_IOWRITE(pdata, MAC_TSAR, addend);
+	XGMAC_IOWRITE_BITS(pdata, MAC_TSCR, TSADDREG, 1);
+
+	/* Wait for addend update to complete */
+	while (XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSADDREG))
+		udelay(5);
+}
+
+static void xgbe_set_tstamp_time(struct xgbe_prv_data *pdata, unsigned int sec,
+				 unsigned int nsec)
+{
+	/* Set the time values and tell the device */
+	XGMAC_IOWRITE(pdata, MAC_STSUR, sec);
+	XGMAC_IOWRITE(pdata, MAC_STNUR, nsec);
+	XGMAC_IOWRITE_BITS(pdata, MAC_TSCR, TSINIT, 1);
+
+	/* Wait for time update to complete */
+	while (XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSINIT))
+		udelay(5);
+}
+
+static u64 xgbe_get_tstamp_time(struct xgbe_prv_data *pdata)
+{
+	u64 nsec;
+
+	nsec = XGMAC_IOREAD(pdata, MAC_STSR);
+	nsec *= NSEC_PER_SEC;
+	nsec += XGMAC_IOREAD(pdata, MAC_STNR);
+
+	return nsec;
+}
+
+static u64 xgbe_get_tx_tstamp(struct xgbe_prv_data *pdata)
+{
+	unsigned int tx_snr;
+	u64 nsec;
+
+	tx_snr = XGMAC_IOREAD(pdata, MAC_TXSNR);
+	if (XGMAC_GET_BITS(tx_snr, MAC_TXSNR, TXTSSTSMIS))
+		return 0;
+
+	nsec = XGMAC_IOREAD(pdata, MAC_TXSSR);
+	nsec *= NSEC_PER_SEC;
+	nsec += tx_snr;
+
+	return nsec;
+}
+
+static void xgbe_get_rx_tstamp(struct xgbe_packet_data *packet,
+			       struct xgbe_ring_desc *rdesc)
+{
+	u64 nsec;
+
+	if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_CONTEXT_DESC3, TSA) &&
+	    !XGMAC_GET_BITS_LE(rdesc->desc3, RX_CONTEXT_DESC3, TSD)) {
+		nsec = le32_to_cpu(rdesc->desc1);
+		nsec <<= 32;
+		nsec |= le32_to_cpu(rdesc->desc0);
+		if (nsec != 0xffffffffffffffffULL) {
+			packet->rx_tstamp = nsec;
+			XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+				       RX_TSTAMP, 1);
+		}
+	}
+}
+
+static int xgbe_config_tstamp(struct xgbe_prv_data *pdata,
+			      unsigned int mac_tscr)
+{
+	/* Set one nano-second accuracy */
+	XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSCTRLSSR, 1);
+
+	/* Set fine timestamp update */
+	XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSCFUPDT, 1);
+
+	/* Overwrite earlier timestamps */
+	XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TXTSSTSM, 1);
+
+	XGMAC_IOWRITE(pdata, MAC_TSCR, mac_tscr);
+
+	/* Exit if timestamping is not enabled */
+	if (!XGMAC_GET_BITS(mac_tscr, MAC_TSCR, TSENA))
+		return 0;
+
+	/* Initialize time registers */
+	XGMAC_IOWRITE_BITS(pdata, MAC_SSIR, SSINC, XGBE_TSTAMP_SSINC);
+	XGMAC_IOWRITE_BITS(pdata, MAC_SSIR, SNSINC, XGBE_TSTAMP_SNSINC);
+	xgbe_update_tstamp_addend(pdata, pdata->tstamp_addend);
+	xgbe_set_tstamp_time(pdata, 0, 0);
+
+	/* Initialize the timecounter */
+	timecounter_init(&pdata->tstamp_tc, &pdata->tstamp_cc,
+			 ktime_to_ns(ktime_get_real()));
+
+	return 0;
+}
+
 static void xgbe_pre_xmit(struct xgbe_channel *channel)
 {
 	struct xgbe_prv_data *pdata = channel->pdata;
@@ -1110,6 +1215,10 @@ static void xgbe_pre_xmit(struct xgbe_channel *channel)
 		XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, VTIR,
 				  TX_NORMAL_DESC2_VLAN_INSERT);
 
+	/* Timestamp enablement check */
+	if (XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES, PTP))
+		XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, TTSE, 1);
+
 	/* Set IC bit based on Tx coalescing settings */
 	XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, IC, 1);
 	if (tx_coalesce && (!tx_frames ||
@@ -1245,6 +1354,25 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
 	xgbe_dump_rx_desc(ring, rdesc, ring->cur);
 #endif
 
+	if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, CTXT)) {
+		/* Timestamp Context Descriptor */
+		xgbe_get_rx_tstamp(packet, rdesc);
+
+		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+			       CONTEXT, 1);
+		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+			       CONTEXT_NEXT, 0);
+		return 0;
+	}
+
+	/* Normal Descriptor, be sure Context Descriptor bit is off */
+	XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, CONTEXT, 0);
+
+	/* Indicate if a Context Descriptor is next */
+	if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, CDA))
+		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+			       CONTEXT_NEXT, 1);
+
 	/* Get the packet length */
 	rdata->len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
 
@@ -2313,5 +2441,12 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
 	hw_if->rx_mmc_int = xgbe_rx_mmc_int;
 	hw_if->read_mmc_stats = xgbe_read_mmc_stats;
 
+	/* For PTP config */
+	hw_if->config_tstamp = xgbe_config_tstamp;
+	hw_if->update_tstamp_addend = xgbe_update_tstamp_addend;
+	hw_if->set_tstamp_time = xgbe_set_tstamp_time;
+	hw_if->get_tstamp_time = xgbe_get_tstamp_time;
+	hw_if->get_tx_tstamp = xgbe_get_tx_tstamp;
+
 	DBGPR("<--xgbe_init_function_ptrs\n");
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 344e6b19ec0e..47a1e25b5609 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -121,6 +121,7 @@
 #include <net/busy_poll.h>
 #include <linux/clk.h>
 #include <linux/if_ether.h>
+#include <linux/net_tstamp.h>
 
 #include "xgbe.h"
 #include "xgbe-common.h"
@@ -202,7 +203,7 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	struct xgbe_channel *channel;
 	unsigned int dma_isr, dma_ch_isr;
-	unsigned int mac_isr;
+	unsigned int mac_isr, mac_tssr;
 	unsigned int i;
 
 	/* The DMA interrupt status register also reports MAC and MTL
@@ -255,6 +256,17 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 
 		if (XGMAC_GET_BITS(mac_isr, MAC_ISR, MMCRXIS))
 			hw_if->rx_mmc_int(pdata);
+
+		if (XGMAC_GET_BITS(mac_isr, MAC_ISR, TSIS)) {
+			mac_tssr = XGMAC_IOREAD(pdata, MAC_TSSR);
+
+			if (XGMAC_GET_BITS(mac_tssr, MAC_TSSR, TXTSC)) {
+				/* Read Tx Timestamp to clear interrupt */
+				pdata->tx_tstamp =
+					hw_if->get_tx_tstamp(pdata);
+				schedule_work(&pdata->tx_tstamp_work);
+			}
+		}
 	}
 
 	DBGPR("  DMA_ISR = %08x\n", XGMAC_IOREAD(pdata, DMA_ISR));
@@ -668,6 +680,197 @@ static void xgbe_restart(struct work_struct *work)
 	rtnl_unlock();
 }
 
+static void xgbe_tx_tstamp(struct work_struct *work)
+{
+	struct xgbe_prv_data *pdata = container_of(work,
+						   struct xgbe_prv_data,
+						   tx_tstamp_work);
+	struct skb_shared_hwtstamps hwtstamps;
+	u64 nsec;
+	unsigned long flags;
+
+	if (pdata->tx_tstamp) {
+		nsec = timecounter_cyc2time(&pdata->tstamp_tc,
+					    pdata->tx_tstamp);
+
+		memset(&hwtstamps, 0, sizeof(hwtstamps));
+		hwtstamps.hwtstamp = ns_to_ktime(nsec);
+		skb_tstamp_tx(pdata->tx_tstamp_skb, &hwtstamps);
+	}
+
+	dev_kfree_skb_any(pdata->tx_tstamp_skb);
+
+	spin_lock_irqsave(&pdata->tstamp_lock, flags);
+	pdata->tx_tstamp_skb = NULL;
+	spin_unlock_irqrestore(&pdata->tstamp_lock, flags);
+}
+
+static int xgbe_get_hwtstamp_settings(struct xgbe_prv_data *pdata,
+				      struct ifreq *ifreq)
+{
+	if (copy_to_user(ifreq->ifr_data, &pdata->tstamp_config,
+			 sizeof(pdata->tstamp_config)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int xgbe_set_hwtstamp_settings(struct xgbe_prv_data *pdata,
+				      struct ifreq *ifreq)
+{
+	struct hwtstamp_config config;
+	unsigned int mac_tscr;
+
+	if (copy_from_user(&config, ifreq->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	if (config.flags)
+		return -EINVAL;
+
+	mac_tscr = 0;
+
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		break;
+
+	case HWTSTAMP_TX_ON:
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
+		break;
+
+	default:
+		return -ERANGE;
+	}
+
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		break;
+
+	case HWTSTAMP_FILTER_ALL:
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENALL, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
+		break;
+
+	/* PTP v2, UDP, any kind of event packet */
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSVER2ENA, 1);
+	/* PTP v1, UDP, any kind of event packet */
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV4ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV6ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, SNAPTYPSEL, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
+		break;
+
+	/* PTP v2, UDP, Sync packet */
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSVER2ENA, 1);
+	/* PTP v1, UDP, Sync packet */
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV4ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV6ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSEVNTENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
+		break;
+
+	/* PTP v2, UDP, Delay_req packet */
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSVER2ENA, 1);
+	/* PTP v1, UDP, Delay_req packet */
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV4ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV6ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSEVNTENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSMSTRENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
+		break;
+
+	/* 802.AS1, Ethernet, any kind of event packet */
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, AV8021ASMEN, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, SNAPTYPSEL, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
+		break;
+
+	/* 802.AS1, Ethernet, Sync packet */
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, AV8021ASMEN, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSEVNTENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
+		break;
+
+	/* 802.AS1, Ethernet, Delay_req packet */
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, AV8021ASMEN, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSMSTRENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSEVNTENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
+		break;
+
+	/* PTP v2/802.AS1, any layer, any kind of event packet */
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSVER2ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV4ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV6ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, SNAPTYPSEL, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
+		break;
+
+	/* PTP v2/802.AS1, any layer, Sync packet */
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSVER2ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV4ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV6ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSEVNTENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
+		break;
+
+	/* PTP v2/802.AS1, any layer, Delay_req packet */
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSVER2ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV4ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV6ENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSMSTRENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSEVNTENA, 1);
+		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
+		break;
+
+	default:
+		return -ERANGE;
+	}
+
+	pdata->hw_if.config_tstamp(pdata, mac_tscr);
+
+	memcpy(&pdata->tstamp_config, &config, sizeof(config));
+
+	return 0;
+}
+
+static void xgbe_prep_tx_tstamp(struct xgbe_prv_data *pdata,
+				struct sk_buff *skb,
+				struct xgbe_packet_data *packet)
+{
+	unsigned long flags;
+
+	if (XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES, PTP)) {
+		spin_lock_irqsave(&pdata->tstamp_lock, flags);
+		if (pdata->tx_tstamp_skb) {
+			/* Another timestamp in progress, ignore this one */
+			XGMAC_SET_BITS(packet->attributes,
+				       TX_PACKET_ATTRIBUTES, PTP, 0);
+		} else {
+			pdata->tx_tstamp_skb = skb_get(skb);
+			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+		}
+		spin_unlock_irqrestore(&pdata->tstamp_lock, flags);
+	}
+
+	if (!XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES, PTP))
+		skb_tx_timestamp(skb);
+}
+
 static void xgbe_prep_vlan(struct sk_buff *skb, struct xgbe_packet_data *packet)
 {
 	if (vlan_tx_tag_present(skb))
@@ -711,7 +914,8 @@ static int xgbe_is_tso(struct sk_buff *skb)
 	return 1;
 }
 
-static void xgbe_packet_info(struct xgbe_ring *ring, struct sk_buff *skb,
+static void xgbe_packet_info(struct xgbe_prv_data *pdata,
+			     struct xgbe_ring *ring, struct sk_buff *skb,
 			     struct xgbe_packet_data *packet)
 {
 	struct skb_frag_struct *frag;
@@ -753,6 +957,11 @@ static void xgbe_packet_info(struct xgbe_ring *ring, struct sk_buff *skb,
 			       VLAN_CTAG, 1);
 	}
 
+	if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+	    (pdata->tstamp_config.tx_type == HWTSTAMP_TX_ON))
+		XGMAC_SET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+			       PTP, 1);
+
 	for (len = skb_headlen(skb); len;) {
 		packet->rdesc_count++;
 		len -= min_t(unsigned int, len, XGBE_TX_MAX_BUF_SIZE);
@@ -776,26 +985,33 @@ static int xgbe_open(struct net_device *netdev)
 
 	DBGPR("-->xgbe_open\n");
 
-	/* Enable the clock */
-	ret = clk_prepare_enable(pdata->sysclock);
+	/* Enable the clocks */
+	ret = clk_prepare_enable(pdata->sysclk);
 	if (ret) {
-		netdev_alert(netdev, "clk_prepare_enable failed\n");
+		netdev_alert(netdev, "dma clk_prepare_enable failed\n");
 		return ret;
 	}
 
+	ret = clk_prepare_enable(pdata->ptpclk);
+	if (ret) {
+		netdev_alert(netdev, "ptp clk_prepare_enable failed\n");
+		goto err_sysclk;
+	}
+
 	/* Calculate the Rx buffer size before allocating rings */
 	ret = xgbe_calc_rx_buf_size(netdev, netdev->mtu);
 	if (ret < 0)
-		goto err_clk;
+		goto err_ptpclk;
 	pdata->rx_buf_size = ret;
 
 	/* Allocate the ring descriptors and buffers */
 	ret = desc_if->alloc_ring_resources(pdata);
 	if (ret)
-		goto err_clk;
+		goto err_ptpclk;
 
-	/* Initialize the device restart work struct */
+	/* Initialize the device restart and Tx timestamp work struct */
 	INIT_WORK(&pdata->restart_work, xgbe_restart);
+	INIT_WORK(&pdata->tx_tstamp_work, xgbe_tx_tstamp);
 
 	/* Request interrupts */
 	ret = devm_request_irq(pdata->dev, netdev->irq, xgbe_isr, 0,
@@ -824,8 +1040,11 @@ err_start:
 err_irq:
 	desc_if->free_ring_resources(pdata);
 
-err_clk:
-	clk_disable_unprepare(pdata->sysclock);
+err_ptpclk:
+	clk_disable_unprepare(pdata->ptpclk);
+
+err_sysclk:
+	clk_disable_unprepare(pdata->sysclk);
 
 	return ret;
 }
@@ -853,8 +1072,9 @@ static int xgbe_close(struct net_device *netdev)
 		pdata->irq_number = 0;
 	}
 
-	/* Disable the clock */
-	clk_disable_unprepare(pdata->sysclock);
+	/* Disable the clocks */
+	clk_disable_unprepare(pdata->ptpclk);
+	clk_disable_unprepare(pdata->sysclk);
 
 	DBGPR("<--xgbe_close\n");
 
@@ -890,7 +1110,7 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	/* Calculate preliminary packet info */
 	memset(packet, 0, sizeof(*packet));
-	xgbe_packet_info(ring, skb, packet);
+	xgbe_packet_info(pdata, ring, skb, packet);
 
 	/* Check that there are enough descriptors available */
 	if (packet->rdesc_count > xgbe_tx_avail_desc(ring)) {
@@ -914,6 +1134,8 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev)
 		goto tx_netdev_return;
 	}
 
+	xgbe_prep_tx_tstamp(pdata, skb, packet);
+
 	/* Configure required descriptor fields for transmission */
 	hw_if->pre_xmit(channel);
 
@@ -968,6 +1190,27 @@ static int xgbe_set_mac_address(struct net_device *netdev, void *addr)
 	return 0;
 }
 
+static int xgbe_ioctl(struct net_device *netdev, struct ifreq *ifreq, int cmd)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	int ret;
+
+	switch (cmd) {
+	case SIOCGHWTSTAMP:
+		ret = xgbe_get_hwtstamp_settings(pdata, ifreq);
+		break;
+
+	case SIOCSHWTSTAMP:
+		ret = xgbe_set_hwtstamp_settings(pdata, ifreq);
+		break;
+
+	default:
+		ret = -EOPNOTSUPP;
+	}
+
+	return ret;
+}
+
 static int xgbe_change_mtu(struct net_device *netdev, int mtu)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
@@ -1109,6 +1352,7 @@ static const struct net_device_ops xgbe_netdev_ops = {
 	.ndo_set_rx_mode	= xgbe_set_rx_mode,
 	.ndo_set_mac_address	= xgbe_set_mac_address,
 	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_do_ioctl		= xgbe_ioctl,
 	.ndo_change_mtu		= xgbe_change_mtu,
 	.ndo_get_stats64	= xgbe_get_stats64,
 	.ndo_vlan_rx_add_vid	= xgbe_vlan_rx_add_vid,
@@ -1202,8 +1446,9 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 	struct xgbe_packet_data *packet;
 	struct net_device *netdev = pdata->netdev;
 	struct sk_buff *skb;
-	unsigned int incomplete, error;
-	unsigned int cur_len, put_len, max_len;
+	struct skb_shared_hwtstamps *hwtstamps;
+	unsigned int incomplete, error, context_next, context;
+	unsigned int len, put_len, max_len;
 	int received = 0;
 
 	DBGPR("-->xgbe_rx_poll: budget=%d\n", budget);
@@ -1212,22 +1457,33 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 	if (!ring)
 		return 0;
 
+	rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
 	packet = &ring->packet_data;
 	while (received < budget) {
 		DBGPR("  cur = %d\n", ring->cur);
 
-		/* Clear the packet data information */
-		memset(packet, 0, sizeof(*packet));
-		skb = NULL;
-		error = 0;
-		cur_len = 0;
+		/* First time in loop see if we need to restore state */
+		if (!received && rdata->state_saved) {
+			incomplete = rdata->state.incomplete;
+			context_next = rdata->state.context_next;
+			skb = rdata->state.skb;
+			error = rdata->state.error;
+			len = rdata->state.len;
+		} else {
+			memset(packet, 0, sizeof(*packet));
+			incomplete = 0;
+			context_next = 0;
+			skb = NULL;
+			error = 0;
+			len = 0;
+		}
 
 read_again:
+		rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
+
 		if (ring->dirty > (XGBE_RX_DESC_CNT >> 3))
 			xgbe_rx_refresh(channel);
 
-		rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
-
 		if (hw_if->dev_read(channel))
 			break;
 
@@ -1242,9 +1498,15 @@ read_again:
 		incomplete = XGMAC_GET_BITS(packet->attributes,
 					    RX_PACKET_ATTRIBUTES,
 					    INCOMPLETE);
+		context_next = XGMAC_GET_BITS(packet->attributes,
+					      RX_PACKET_ATTRIBUTES,
+					      CONTEXT_NEXT);
+		context = XGMAC_GET_BITS(packet->attributes,
+					 RX_PACKET_ATTRIBUTES,
+					 CONTEXT);
 
 		/* Earlier error, just drain the remaining data */
-		if (incomplete && error)
+		if ((incomplete || context_next) && error)
 			goto read_again;
 
 		if (error || packet->errors) {
@@ -1254,30 +1516,37 @@ read_again:
 			continue;
 		}
 
-		put_len = rdata->len - cur_len;
-		if (skb) {
-			if (pskb_expand_head(skb, 0, put_len, GFP_ATOMIC)) {
-				DBGPR("pskb_expand_head error\n");
-				if (incomplete) {
-					error = 1;
-					goto read_again;
+		if (!context) {
+			put_len = rdata->len - len;
+			if (skb) {
+				if (pskb_expand_head(skb, 0, put_len,
+						     GFP_ATOMIC)) {
+					DBGPR("pskb_expand_head error\n");
+					if (incomplete) {
+						error = 1;
+						goto read_again;
+					}
+
+					dev_kfree_skb(skb);
+					continue;
 				}
-
-				dev_kfree_skb(skb);
-				continue;
+				memcpy(skb_tail_pointer(skb), rdata->skb->data,
+				       put_len);
+			} else {
+				skb = rdata->skb;
+				rdata->skb = NULL;
 			}
-			memcpy(skb_tail_pointer(skb), rdata->skb->data,
-			       put_len);
-		} else {
-			skb = rdata->skb;
-			rdata->skb = NULL;
+			skb_put(skb, put_len);
+			len += put_len;
 		}
-		skb_put(skb, put_len);
-		cur_len += put_len;
 
-		if (incomplete)
+		if (incomplete || context_next)
 			goto read_again;
 
+		/* Stray Context Descriptor? */
+		if (!skb)
+			continue;
+
 		/* Be sure we don't exceed the configured MTU */
 		max_len = netdev->mtu + ETH_HLEN;
 		if (!(netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
@@ -1304,6 +1573,16 @@ read_again:
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 					       packet->vlan_ctag);
 
+		if (XGMAC_GET_BITS(packet->attributes,
+				   RX_PACKET_ATTRIBUTES, RX_TSTAMP)) {
+			u64 nsec;
+
+			nsec = timecounter_cyc2time(&pdata->tstamp_tc,
+						    packet->rx_tstamp);
+			hwtstamps = skb_hwtstamps(skb);
+			hwtstamps->hwtstamp = ns_to_ktime(nsec);
+		}
+
 		skb->dev = netdev;
 		skb->protocol = eth_type_trans(skb, netdev);
 		skb_record_rx_queue(skb, channel->queue_index);
@@ -1313,6 +1592,17 @@ read_again:
 		napi_gro_receive(&pdata->napi, skb);
 	}
 
+	/* Check if we need to save state before leaving */
+	if (received && (incomplete || context_next)) {
+		rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
+		rdata->state_saved = 1;
+		rdata->state.incomplete = incomplete;
+		rdata->state.context_next = context_next;
+		rdata->state.skb = skb;
+		rdata->state.len = len;
+		rdata->state.error = error;
+	}
+
 	DBGPR("<--xgbe_rx_poll: received = %d\n", received);
 
 	return received;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index f7405261f23e..4961666fa55f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -116,6 +116,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/phy.h>
+#include <linux/net_tstamp.h>
 
 #include "xgbe.h"
 #include "xgbe-common.h"
@@ -480,6 +481,39 @@ static int xgbe_set_coalesce(struct net_device *netdev,
 	return 0;
 }
 
+static int xgbe_get_ts_info(struct net_device *netdev,
+			    struct ethtool_ts_info *ts_info)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+
+	ts_info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+				   SOF_TIMESTAMPING_RX_SOFTWARE |
+				   SOF_TIMESTAMPING_SOFTWARE |
+				   SOF_TIMESTAMPING_TX_HARDWARE |
+				   SOF_TIMESTAMPING_RX_HARDWARE |
+				   SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	if (pdata->ptp_clock)
+		ts_info->phc_index = ptp_clock_index(pdata->ptp_clock);
+	else
+		ts_info->phc_index = -1;
+
+	ts_info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON);
+	ts_info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+			      (1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT) |
+			      (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
+			      (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
+			      (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+			      (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+			      (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
+			      (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
+			      (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
+			      (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ) |
+			      (1 << HWTSTAMP_FILTER_ALL);
+
+	return 0;
+}
+
 static const struct ethtool_ops xgbe_ethtool_ops = {
 	.get_settings = xgbe_get_settings,
 	.set_settings = xgbe_set_settings,
@@ -492,6 +526,7 @@ static const struct ethtool_ops xgbe_ethtool_ops = {
 	.get_strings = xgbe_get_strings,
 	.get_ethtool_stats = xgbe_get_ethtool_stats,
 	.get_sset_count = xgbe_get_sset_count,
+	.get_ts_info = xgbe_get_ts_info,
 };
 
 struct ethtool_ops *xgbe_get_ethtool_ops(void)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index e56c3d45b30e..ca6a6af00f9f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -245,6 +245,7 @@ static int xgbe_probe(struct platform_device *pdev)
 
 	spin_lock_init(&pdata->lock);
 	mutex_init(&pdata->xpcs_mutex);
+	spin_lock_init(&pdata->tstamp_lock);
 
 	/* Set and validate the number of descriptors for a ring */
 	BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_TX_DESC_CNT);
@@ -265,10 +266,18 @@ static int xgbe_probe(struct platform_device *pdev)
 	}
 
 	/* Obtain the system clock setting */
-	pdata->sysclock = devm_clk_get(dev, NULL);
-	if (IS_ERR(pdata->sysclock)) {
-		dev_err(dev, "devm_clk_get failed\n");
-		ret = PTR_ERR(pdata->sysclock);
+	pdata->sysclk = devm_clk_get(dev, XGBE_DMA_CLOCK);
+	if (IS_ERR(pdata->sysclk)) {
+		dev_err(dev, "dma devm_clk_get failed\n");
+		ret = PTR_ERR(pdata->sysclk);
+		goto err_io;
+	}
+
+	/* Obtain the PTP clock setting */
+	pdata->ptpclk = devm_clk_get(dev, XGBE_PTP_CLOCK);
+	if (IS_ERR(pdata->ptpclk)) {
+		dev_err(dev, "ptp devm_clk_get failed\n");
+		ret = PTR_ERR(pdata->ptpclk);
 		goto err_io;
 	}
 
@@ -420,6 +429,8 @@ static int xgbe_probe(struct platform_device *pdev)
 		goto err_reg_netdev;
 	}
 
+	xgbe_ptp_register(pdata);
+
 	xgbe_debugfs_init(pdata);
 
 	netdev_notice(netdev, "net device enabled\n");
@@ -452,6 +463,8 @@ static int xgbe_remove(struct platform_device *pdev)
 
 	xgbe_debugfs_exit(pdata);
 
+	xgbe_ptp_unregister(pdata);
+
 	unregister_netdev(netdev);
 
 	xgbe_mdio_unregister(pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
new file mode 100644
index 000000000000..37e64cfa5718
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
@@ -0,0 +1,285 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/clk.h>
+#include <linux/clocksource.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/net_tstamp.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+
+static cycle_t xgbe_cc_read(const struct cyclecounter *cc)
+{
+	struct xgbe_prv_data *pdata = container_of(cc,
+						   struct xgbe_prv_data,
+						   tstamp_cc);
+	u64 nsec;
+
+	nsec = pdata->hw_if.get_tstamp_time(pdata);
+
+	return nsec;
+}
+
+static int xgbe_adjfreq(struct ptp_clock_info *info, s32 delta)
+{
+	struct xgbe_prv_data *pdata = container_of(info,
+						   struct xgbe_prv_data,
+						   ptp_clock_info);
+	unsigned long flags;
+	u64 adjust;
+	u32 addend, diff;
+	unsigned int neg_adjust = 0;
+
+	if (delta < 0) {
+		neg_adjust = 1;
+		delta = -delta;
+	}
+
+	adjust = pdata->tstamp_addend;
+	adjust *= delta;
+	diff = div_u64(adjust, 1000000000UL);
+
+	addend = (neg_adjust) ? pdata->tstamp_addend - diff :
+				pdata->tstamp_addend + diff;
+
+	spin_lock_irqsave(&pdata->tstamp_lock, flags);
+
+	pdata->hw_if.update_tstamp_addend(pdata, addend);
+
+	spin_unlock_irqrestore(&pdata->tstamp_lock, flags);
+
+	return 0;
+}
+
+static int xgbe_adjtime(struct ptp_clock_info *info, s64 delta)
+{
+	struct xgbe_prv_data *pdata = container_of(info,
+						   struct xgbe_prv_data,
+						   ptp_clock_info);
+	unsigned long flags;
+	u64 nsec;
+
+	spin_lock_irqsave(&pdata->tstamp_lock, flags);
+
+	nsec = timecounter_read(&pdata->tstamp_tc);
+
+	nsec += delta;
+	timecounter_init(&pdata->tstamp_tc, &pdata->tstamp_cc, nsec);
+
+	spin_unlock_irqrestore(&pdata->tstamp_lock, flags);
+
+	return 0;
+}
+
+static int xgbe_gettime(struct ptp_clock_info *info, struct timespec *ts)
+{
+	struct xgbe_prv_data *pdata = container_of(info,
+						   struct xgbe_prv_data,
+						   ptp_clock_info);
+	unsigned long flags;
+	u64 nsec;
+
+	spin_lock_irqsave(&pdata->tstamp_lock, flags);
+
+	nsec = timecounter_read(&pdata->tstamp_tc);
+
+	spin_unlock_irqrestore(&pdata->tstamp_lock, flags);
+
+	*ts = ns_to_timespec(nsec);
+
+	return 0;
+}
+
+static int xgbe_settime(struct ptp_clock_info *info, const struct timespec *ts)
+{
+	struct xgbe_prv_data *pdata = container_of(info,
+						   struct xgbe_prv_data,
+						   ptp_clock_info);
+	unsigned long flags;
+	u64 nsec;
+
+	nsec = timespec_to_ns(ts);
+
+	spin_lock_irqsave(&pdata->tstamp_lock, flags);
+
+	timecounter_init(&pdata->tstamp_tc, &pdata->tstamp_cc, nsec);
+
+	spin_unlock_irqrestore(&pdata->tstamp_lock, flags);
+
+	return 0;
+}
+
+static int xgbe_enable(struct ptp_clock_info *info,
+		       struct ptp_clock_request *request, int on)
+{
+	return -EOPNOTSUPP;
+}
+
+void xgbe_ptp_register(struct xgbe_prv_data *pdata)
+{
+	struct ptp_clock_info *info = &pdata->ptp_clock_info;
+	struct ptp_clock *clock;
+	struct cyclecounter *cc = &pdata->tstamp_cc;
+	u64 dividend;
+
+	snprintf(info->name, sizeof(info->name), "%s",
+		 netdev_name(pdata->netdev));
+	info->owner = THIS_MODULE;
+	info->max_adj = clk_get_rate(pdata->ptpclk);
+	info->adjfreq = xgbe_adjfreq;
+	info->adjtime = xgbe_adjtime;
+	info->gettime = xgbe_gettime;
+	info->settime = xgbe_settime;
+	info->enable = xgbe_enable;
+
+	clock = ptp_clock_register(info, pdata->dev);
+	if (IS_ERR(clock)) {
+		dev_err(pdata->dev, "ptp_clock_register failed\n");
+		return;
+	}
+
+	pdata->ptp_clock = clock;
+
+	/* Calculate the addend:
+	 *   addend = 2^32 / (PTP ref clock / 50Mhz)
+	 *          = (2^32 * 50Mhz) / PTP ref clock
+	 */
+	dividend = 50000000;
+	dividend <<= 32;
+	pdata->tstamp_addend = div_u64(dividend, clk_get_rate(pdata->ptpclk));
+
+	/* Setup the timecounter */
+	cc->read = xgbe_cc_read;
+	cc->mask = CLOCKSOURCE_MASK(64);
+	cc->mult = 1;
+	cc->shift = 0;
+
+	timecounter_init(&pdata->tstamp_tc, &pdata->tstamp_cc,
+			 ktime_to_ns(ktime_get_real()));
+
+	/* Disable all timestamping to start */
+	XGMAC_IOWRITE(pdata, MAC_TCR, 0);
+	pdata->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+	pdata->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+}
+
+void xgbe_ptp_unregister(struct xgbe_prv_data *pdata)
+{
+	if (pdata->ptp_clock)
+		ptp_clock_unregister(pdata->ptp_clock);
+}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 9e24b296e272..8b6ad3e82c34 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -123,6 +123,9 @@
 #include <linux/phy.h>
 #include <linux/if_vlan.h>
 #include <linux/bitops.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/clocksource.h>
+#include <linux/net_tstamp.h>
 
 
 #define XGBE_DRV_NAME		"amd-xgbe"
@@ -164,6 +167,16 @@
 #define XGBE_PHY_NAME		"amd_xgbe_phy"
 #define XGBE_PRTAD		0
 
+/* Device-tree clock names */
+#define XGBE_DMA_CLOCK		"dma_clk"
+#define XGBE_PTP_CLOCK		"ptp_clk"
+
+/* Timestamp support - values based on 50MHz PTP clock
+ *   50MHz => 20 nsec
+ */
+#define XGBE_TSTAMP_SSINC	20
+#define XGBE_TSTAMP_SNSINC	0
+
 /* Driver PMT macros */
 #define XGMAC_DRIVER_CONTEXT	1
 #define XGMAC_IOCTL_CONTEXT	2
@@ -214,6 +227,8 @@ struct xgbe_packet_data {
 	unsigned short mss;
 
 	unsigned short vlan_ctag;
+
+	u64 rx_tstamp;
 };
 
 /* Common Rx and Tx descriptor mapping */
@@ -242,6 +257,20 @@ struct xgbe_ring_data {
 	unsigned int interrupt;		/* Interrupt indicator */
 
 	unsigned int mapped_as_page;
+
+	/* Incomplete receive save location.  If the budget is exhausted
+	 * or the last descriptor (last normal descriptor or a following
+	 * context descriptor) has not been DMA'd yet the current state
+	 * of the receive processing needs to be saved.
+	 */
+	unsigned int state_saved;
+	struct {
+		unsigned int incomplete;
+		unsigned int context_next;
+		struct sk_buff *skb;
+		unsigned int len;
+		unsigned int error;
+	} state;
 };
 
 struct xgbe_ring {
@@ -467,6 +496,14 @@ struct xgbe_hw_if {
 	void (*rx_mmc_int)(struct xgbe_prv_data *);
 	void (*tx_mmc_int)(struct xgbe_prv_data *);
 	void (*read_mmc_stats)(struct xgbe_prv_data *);
+
+	/* For Timestamp config */
+	int (*config_tstamp)(struct xgbe_prv_data *, unsigned int);
+	void (*update_tstamp_addend)(struct xgbe_prv_data *, unsigned int);
+	void (*set_tstamp_time)(struct xgbe_prv_data *, unsigned int sec,
+				unsigned int nsec);
+	u64 (*get_tstamp_time)(struct xgbe_prv_data *);
+	u64 (*get_tx_tstamp)(struct xgbe_prv_data *);
 };
 
 struct xgbe_desc_if {
@@ -607,8 +644,21 @@ struct xgbe_prv_data {
 	/* Filtering support */
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 
-	/* System clock value used for Rx watchdog */
-	struct clk *sysclock;
+	/* Device clocks */
+	struct clk *sysclk;
+	struct clk *ptpclk;
+
+	/* Timestamp support */
+	spinlock_t tstamp_lock;
+	struct ptp_clock_info ptp_clock_info;
+	struct ptp_clock *ptp_clock;
+	struct hwtstamp_config tstamp_config;
+	struct cyclecounter tstamp_cc;
+	struct timecounter tstamp_tc;
+	unsigned int tstamp_addend;
+	struct work_struct tx_tstamp_work;
+	struct sk_buff *tx_tstamp_skb;
+	u64 tx_tstamp;
 
 	/* Hardware features of the device */
 	struct xgbe_hw_features hw_feat;
@@ -639,6 +689,8 @@ struct ethtool_ops *xgbe_get_ethtool_ops(void);
 int xgbe_mdio_register(struct xgbe_prv_data *);
 void xgbe_mdio_unregister(struct xgbe_prv_data *);
 void xgbe_dump_phy_registers(struct xgbe_prv_data *);
+void xgbe_ptp_register(struct xgbe_prv_data *);
+void xgbe_ptp_unregister(struct xgbe_prv_data *);
 void xgbe_dump_tx_desc(struct xgbe_ring *, unsigned int, unsigned int,
 		       unsigned int);
 void xgbe_dump_rx_desc(struct xgbe_ring *, struct xgbe_ring_desc *,
-- 
cgit v1.2.3


From f047604a3ff1a1d7c8bd4a43c72de3936d71f3c1 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Tue, 29 Jul 2014 08:57:25 -0500
Subject: amd-xgbe: Update/fix 2.5GbE support

Update the amd-xgbe driver and phylib driver to better support
the 2.5GbE mode for the hardware. In order to be able establish
2.5GbE using clause 73 auto negotiation the device will support
speed sets of 1GbE/10GbE and 2.5GbE/10GbE.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/amd-xgbe-phy.txt       |  6 ++
 drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c       | 17 +++-
 drivers/net/phy/amd-xgbe-phy.c                     | 92 ++++++++++++++++++----
 3 files changed, 96 insertions(+), 19 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt
index d01ed63d3ebb..42409bfe04c4 100644
--- a/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt
+++ b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt
@@ -8,10 +8,16 @@ Required properties:
    - SerDes integration registers (1/2)
    - SerDes integration registers (2/2)
 
+Optional properties:
+- amd,speed-set: Speed capabilities of the device
+    0 - 1GbE and 10GbE (default)
+    1 - 2.5GbE and 10GbE
+
 Example:
 	xgbe_phy@e1240800 {
 		compatible = "amd,xgbe-phy-seattle-v1a", "ethernet-phy-ieee802.3-c45";
 		reg = <0 0xe1240800 0 0x00400>,
 		      <0 0xe1250000 0 0x00060>,
 		      <0 0xe1250080 0 0x00004>;
+		amd,speed-set = <0>;
 	};
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index 4961666fa55f..6005b6021f78 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -327,10 +327,19 @@ static int xgbe_set_settings(struct net_device *netdev,
 	    (cmd->autoneg != AUTONEG_DISABLE))
 		goto unlock;
 
-	if ((cmd->autoneg == AUTONEG_DISABLE) &&
-	    (((speed != SPEED_10000) && (speed != SPEED_1000)) ||
-	     (cmd->duplex != DUPLEX_FULL)))
-		goto unlock;
+	if (cmd->autoneg == AUTONEG_DISABLE) {
+		switch (speed) {
+		case SPEED_10000:
+		case SPEED_2500:
+		case SPEED_1000:
+			break;
+		default:
+			goto unlock;
+		}
+
+		if (cmd->duplex != DUPLEX_FULL)
+			goto unlock;
+	}
 
 	cmd->advertising &= phydev->supported;
 	if ((cmd->autoneg == AUTONEG_ENABLE) && !cmd->advertising)
diff --git a/drivers/net/phy/amd-xgbe-phy.c b/drivers/net/phy/amd-xgbe-phy.c
index b57c22442867..b35293da9f87 100644
--- a/drivers/net/phy/amd-xgbe-phy.c
+++ b/drivers/net/phy/amd-xgbe-phy.c
@@ -74,7 +74,6 @@
 #include <linux/of_platform.h>
 #include <linux/of_device.h>
 #include <linux/uaccess.h>
-#include <asm/irq.h>
 
 
 MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
@@ -85,6 +84,8 @@ MODULE_DESCRIPTION("AMD 10GbE (amd-xgbe) PHY driver");
 #define XGBE_PHY_ID	0x000162d0
 #define XGBE_PHY_MASK	0xfffffff0
 
+#define XGBE_PHY_SPEEDSET_PROPERTY	"amd,speed-set"
+
 #define XGBE_AN_INT_CMPLT		0x01
 #define XGBE_AN_INC_LINK		0x02
 #define XGBE_AN_PG_RCV			0x04
@@ -145,7 +146,7 @@ MODULE_DESCRIPTION("AMD 10GbE (amd-xgbe) PHY driver");
 
 #define SPEED_2500_CDR			0x2
 #define SPEED_2500_PLL			0x0
-#define SPEED_2500_RATE			0x2
+#define SPEED_2500_RATE			0x1
 #define SPEED_2500_TXAMP		0xf
 #define SPEED_2500_WORD			0x1
 
@@ -292,6 +293,11 @@ enum amd_xgbe_phy_mode {
 	AMD_XGBE_MODE_KX,
 };
 
+enum amd_xgbe_phy_speedset {
+	AMD_XGBE_PHY_SPEEDSET_1000_10000,
+	AMD_XGBE_PHY_SPEEDSET_2500_10000,
+};
+
 struct amd_xgbe_phy_priv {
 	struct platform_device *pdev;
 	struct device *dev;
@@ -311,6 +317,7 @@ struct amd_xgbe_phy_priv {
 	/* Maintain link status for re-starting auto-negotiation */
 	unsigned int link;
 	enum amd_xgbe_phy_mode mode;
+	unsigned int speed_set;
 
 	/* Auto-negotiation state machine support */
 	struct mutex an_mutex;
@@ -546,10 +553,14 @@ static int amd_xgbe_phy_switch_mode(struct phy_device *phydev)
 	int ret;
 
 	/* If we are in KR switch to KX, and vice-versa */
-	if (priv->mode == AMD_XGBE_MODE_KR)
-		ret = amd_xgbe_phy_gmii_mode(phydev);
-	else
+	if (priv->mode == AMD_XGBE_MODE_KR) {
+		if (priv->speed_set == AMD_XGBE_PHY_SPEEDSET_1000_10000)
+			ret = amd_xgbe_phy_gmii_mode(phydev);
+		else
+			ret = amd_xgbe_phy_gmii_2500_mode(phydev);
+	} else {
 		ret = amd_xgbe_phy_xgmii_mode(phydev);
+	}
 
 	return ret;
 }
@@ -713,7 +724,8 @@ static enum amd_xgbe_phy_an amd_xgbe_an_start(struct phy_device *phydev)
 	else
 		ret &= ~0x80;
 
-	if (phydev->supported & SUPPORTED_1000baseKX_Full)
+	if ((phydev->supported & SUPPORTED_1000baseKX_Full) ||
+	    (phydev->supported & SUPPORTED_2500baseX_Full))
 		ret |= 0x20;
 	else
 		ret &= ~0x20;
@@ -896,14 +908,22 @@ static int amd_xgbe_phy_soft_reset(struct phy_device *phydev)
 
 static int amd_xgbe_phy_config_init(struct phy_device *phydev)
 {
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+
 	/* Initialize supported features */
 	phydev->supported = SUPPORTED_Autoneg;
 	phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
 	phydev->supported |= SUPPORTED_Backplane;
-	phydev->supported |= SUPPORTED_1000baseKX_Full |
-			     SUPPORTED_2500baseX_Full;
 	phydev->supported |= SUPPORTED_10000baseKR_Full |
 			     SUPPORTED_10000baseR_FEC;
+	switch (priv->speed_set) {
+	case AMD_XGBE_PHY_SPEEDSET_1000_10000:
+		phydev->supported |= SUPPORTED_1000baseKX_Full;
+		break;
+	case AMD_XGBE_PHY_SPEEDSET_2500_10000:
+		phydev->supported |= SUPPORTED_2500baseX_Full;
+		break;
+	}
 	phydev->advertising = phydev->supported;
 
 	/* Turn off and clear interrupts */
@@ -1020,9 +1040,9 @@ static int amd_xgbe_phy_update_link(struct phy_device *phydev)
 	 * (re-)established (cable connected after the interface is
 	 * up, etc.), the link status may report no link. If there
 	 * is no link, try switching modes and checking the status
-	 * again.
+	 * again if auto negotiation is enabled.
 	 */
-	check_again = 1;
+	check_again = (phydev->autoneg == AUTONEG_ENABLE) ? 1 : 0;
 again:
 	/* Link status is latched low, so read once to clear
 	 * and then read again to get current state
@@ -1038,8 +1058,10 @@ again:
 	phydev->link = (ret & MDIO_STAT1_LSTATUS) ? 1 : 0;
 
 	if (!phydev->link) {
-		ret = amd_xgbe_phy_switch_mode(phydev);
 		if (check_again) {
+			ret = amd_xgbe_phy_switch_mode(phydev);
+			if (ret < 0)
+				return ret;
 			check_again = 0;
 			goto again;
 		}
@@ -1059,6 +1081,7 @@ again:
 
 static int amd_xgbe_phy_read_status(struct phy_device *phydev)
 {
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
 	u32 mmd_mask = phydev->c45_ids.devices_in_package;
 	int ret, mode, ad_ret, lp_ret;
 
@@ -1108,9 +1131,19 @@ static int amd_xgbe_phy_read_status(struct phy_device *phydev)
 					return ret;
 			}
 		} else {
-			phydev->speed = SPEED_1000;
+			int (*mode_fcn)(struct phy_device *);
+
+			if (priv->speed_set ==
+			    AMD_XGBE_PHY_SPEEDSET_1000_10000) {
+				phydev->speed = SPEED_1000;
+				mode_fcn = amd_xgbe_phy_gmii_mode;
+			} else {
+				phydev->speed = SPEED_2500;
+				mode_fcn = amd_xgbe_phy_gmii_2500_mode;
+			}
+
 			if (mode == MDIO_PCS_CTRL2_10GBR) {
-				ret = amd_xgbe_phy_gmii_mode(phydev);
+				ret = mode_fcn(phydev);
 				if (ret < 0)
 					return ret;
 			}
@@ -1118,8 +1151,15 @@ static int amd_xgbe_phy_read_status(struct phy_device *phydev)
 
 		phydev->duplex = DUPLEX_FULL;
 	} else {
-		phydev->speed = (mode == MDIO_PCS_CTRL2_10GBR) ? SPEED_10000
-							       : SPEED_1000;
+		if (mode == MDIO_PCS_CTRL2_10GBR) {
+			phydev->speed = SPEED_10000;
+		} else {
+			if (priv->speed_set ==
+			    AMD_XGBE_PHY_SPEEDSET_1000_10000)
+				phydev->speed = SPEED_1000;
+			else
+				phydev->speed = SPEED_2500;
+		}
 		phydev->duplex = DUPLEX_FULL;
 		phydev->pause = 0;
 		phydev->asym_pause = 0;
@@ -1176,6 +1216,8 @@ static int amd_xgbe_phy_probe(struct phy_device *phydev)
 	struct platform_device *pdev;
 	struct device *dev;
 	char *wq_name;
+	const __be32 *property;
+	unsigned int speed_set;
 	int ret;
 
 	if (!phydev->dev.of_node)
@@ -1227,6 +1269,26 @@ static int amd_xgbe_phy_probe(struct phy_device *phydev)
 		goto err_sir0;
 	}
 
+	/* Get the device speed set property */
+	speed_set = 0;
+	property = of_get_property(dev->of_node, XGBE_PHY_SPEEDSET_PROPERTY,
+				   NULL);
+	if (property)
+		speed_set = be32_to_cpu(*property);
+
+	switch (speed_set) {
+	case 0:
+		priv->speed_set = AMD_XGBE_PHY_SPEEDSET_1000_10000;
+		break;
+	case 1:
+		priv->speed_set = AMD_XGBE_PHY_SPEEDSET_2500_10000;
+		break;
+	default:
+		dev_err(dev, "invalid amd,speed-set property\n");
+		ret = -EINVAL;
+		goto err_sir1;
+	}
+
 	priv->link = 1;
 
 	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2);
-- 
cgit v1.2.3


From 49193a66f00148443288a83c6ac377a641b10bd5 Mon Sep 17 00:00:00 2001
From: Vince Bridgers <vbridgers2013@gmail.com>
Date: Tue, 29 Jul 2014 15:19:59 -0500
Subject: Documentation: networking: phy.txt: Update text for indirect MMD
 access

Update the PHY library documentation to describe how a specific PHY
driver can use the PAL MMD register access routines or override those
routines with it's own in the event the PHY does not support the IEEE
standard for reading and writing MMD phy registers.

Signed-off-by: Vince Bridgers <vbridgers2013@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/phy.txt | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt
index 3544c98401fd..e839e7efc835 100644
--- a/Documentation/networking/phy.txt
+++ b/Documentation/networking/phy.txt
@@ -272,6 +272,8 @@ Writing a PHY driver
    txtsamp: Requests a transmit timestamp at the PHY level for a 'skb'
    set_wol: Enable Wake-on-LAN at the PHY level
    get_wol: Get the Wake-on-LAN status at the PHY level
+   read_mmd_indirect: Read PHY MMD indirect register
+   write_mmd_indirect: Write PHY MMD indirect register
 
  Of these, only config_aneg and read_status are required to be
  assigned by the driver code.  The rest are optional.  Also, it is
@@ -284,7 +286,21 @@ Writing a PHY driver
 
  Feel free to look at the Marvell, Cicada, and Davicom drivers in
  drivers/net/phy/ for examples (the lxt and qsemi drivers have
- not been tested as of this writing)
+ not been tested as of this writing).
+
+ The PHY's MMD register accesses are handled by the PAL framework
+ by default, but can be overridden by a specific PHY driver if
+ required. This could be the case if a PHY was released for
+ manufacturing before the MMD PHY register definitions were
+ standardized by the IEEE. Most modern PHYs will be able to use
+ the generic PAL framework for accessing the PHY's MMD registers.
+ An example of such usage is for Energy Efficient Ethernet support,
+ implemented in the PAL. This support uses the PAL to access MMD
+ registers for EEE query and configuration if the PHY supports
+ the IEEE standard access mechanisms, or can use the PHY's specific
+ access interfaces if overridden by the specific PHY driver. See
+ the Micrel driver in drivers/net/phy/ for an example of how this
+ can be implemented.
 
 Board Fixups
 
-- 
cgit v1.2.3


From 94ceaa26320fc48c16ea299ee7b546a0203d95b5 Mon Sep 17 00:00:00 2001
From: Vince Bridgers <vbridgers2013@gmail.com>
Date: Thu, 31 Jul 2014 15:49:15 -0500
Subject: dts: Add bindings for multicast hash bins and perfect filter entries

This change adds bindings for the number of multicast hash bins and perfect
filter entries supported by the Synopsys EMAC. The Synopsys EMAC core is
configurable at device creation time, and can be configured for a different
number of multicast hash bins and a different number of perfect filter
entries. The device does not provide a way to query these parameters,
therefore parameters are required. The Altera Cyclone V SOC has support for
256 multicast hash bins and 128 perfect filter entries, and is different
than what's currently provided in the stmmac driver.

Signed-off-by: Vince Bridgers <vbridgers2013@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/stmmac.txt | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt
index a2acd2b26baf..9b03c57563a4 100644
--- a/Documentation/devicetree/bindings/net/stmmac.txt
+++ b/Documentation/devicetree/bindings/net/stmmac.txt
@@ -25,6 +25,10 @@ Required properties:
 - snps,force_sf_dma_mode	Force DMA to use the Store and Forward
 				mode for both tx and rx. This flag is
 				ignored if force_thresh_dma_mode is set.
+- snps,multicast-filter-bins:	Number of multicast filter hash bins
+				supported by this device instance
+- snps,perfect-filter-entries:	Number of perfect filter entries supported
+				by this device instance
 
 Optional properties:
 - resets: Should contain a phandle to the STMMAC reset signal, if any
@@ -47,6 +51,8 @@ Examples:
 		mac-address = [000000000000]; /* Filled in by U-Boot */
 		max-frame-size = <3800>;
 		phy-mode = "gmii";
+		snps,multicast-filter-bins = <256>;
+		snps,perfect-filter-entries = <128>;
 		clocks = <&clock>;
 		clock-names = "stmmaceth">;
 	};
-- 
cgit v1.2.3


From 4df95ff488eb796aab9566652c250330179def17 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 30 Jul 2014 20:34:14 -0700
Subject: net: filter: rename sk_chk_filter() -> bpf_check_classic()

trivial rename to indicate that this functions performs classic BPF checking

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt |  2 +-
 include/linux/filter.h              |  2 +-
 kernel/bpf/core.c                   |  2 +-
 kernel/seccomp.c                    |  4 ++--
 net/core/filter.c                   | 10 +++++-----
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index ee78eba78a9d..712068be8171 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -591,7 +591,7 @@ sk_unattached_filter_destroy() for destroying it. The macro
 SK_RUN_FILTER(filter, ctx) transparently invokes eBPF interpreter or JITed
 code to run the filter. 'filter' is a pointer to struct sk_filter that we
 got from sk_unattached_filter_create(), and 'ctx' the given context (e.g.
-skb pointer). All constraints and restrictions from sk_chk_filter() apply
+skb pointer). All constraints and restrictions from bpf_check_classic() apply
 before a conversion to the new layout is being done behind the scenes!
 
 Currently, the classic BPF format is being used for JITing on most of the
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 3769341a745d..c4d0be4c5e75 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -361,7 +361,7 @@ void sk_unattached_filter_destroy(struct sk_filter *fp);
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_detach_filter(struct sock *sk);
 
-int sk_chk_filter(const struct sock_filter *filter, unsigned int flen);
+int bpf_check_classic(const struct sock_filter *filter, unsigned int flen);
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 265a02cc822d..b479807ec383 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -18,7 +18,7 @@
  * 2 of the License, or (at your option) any later version.
  *
  * Andi Kleen - Fix a few bad bugs and races.
- * Kris Katterjohn - Added many additional checks in sk_chk_filter()
+ * Kris Katterjohn - Added many additional checks in bpf_check_classic()
  */
 #include <linux/filter.h>
 #include <linux/skbuff.h>
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 565743db5384..f4a77d23f209 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -87,7 +87,7 @@ static void populate_seccomp_data(struct seccomp_data *sd)
  *	@filter: filter to verify
  *	@flen: length of filter
  *
- * Takes a previously checked filter (by sk_chk_filter) and
+ * Takes a previously checked filter (by bpf_check_classic) and
  * redirects all filter code that loads struct sk_buff data
  * and related data through seccomp_bpf_load.  It also
  * enforces length and alignment checking of those loads.
@@ -239,7 +239,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 		goto free_prog;
 
 	/* Check and rewrite the fprog via the skb checker */
-	ret = sk_chk_filter(fp, fprog->len);
+	ret = bpf_check_classic(fp, fprog->len);
 	if (ret)
 		goto free_prog;
 
diff --git a/net/core/filter.c b/net/core/filter.c
index d6cb287e4f59..5740ea08a3ad 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -18,7 +18,7 @@
  * 2 of the License, or (at your option) any later version.
  *
  * Andi Kleen - Fix a few bad bugs and races.
- * Kris Katterjohn - Added many additional checks in sk_chk_filter()
+ * Kris Katterjohn - Added many additional checks in bpf_check_classic()
  */
 
 #include <linux/module.h>
@@ -721,7 +721,7 @@ static bool chk_code_allowed(u16 code_to_probe)
 }
 
 /**
- *	sk_chk_filter - verify socket filter code
+ *	bpf_check_classic - verify socket filter code
  *	@filter: filter to verify
  *	@flen: length of filter
  *
@@ -734,7 +734,7 @@ static bool chk_code_allowed(u16 code_to_probe)
  *
  * Returns 0 if the rule set is legal or -EINVAL if not.
  */
-int sk_chk_filter(const struct sock_filter *filter, unsigned int flen)
+int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
 {
 	bool anc_found;
 	int pc;
@@ -808,7 +808,7 @@ int sk_chk_filter(const struct sock_filter *filter, unsigned int flen)
 
 	return -EINVAL;
 }
-EXPORT_SYMBOL(sk_chk_filter);
+EXPORT_SYMBOL(bpf_check_classic);
 
 static int sk_store_orig_filter(struct sk_filter *fp,
 				const struct sock_fprog *fprog)
@@ -968,7 +968,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
 	fp->bpf_func = NULL;
 	fp->jited = 0;
 
-	err = sk_chk_filter(fp->insns, fp->len);
+	err = bpf_check_classic(fp->insns, fp->len);
 	if (err) {
 		__sk_filter_release(fp);
 		return ERR_PTR(err);
-- 
cgit v1.2.3


From 7ae457c1e5b45a1b826fad9d62b32191d2bdcfdb Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 30 Jul 2014 20:34:16 -0700
Subject: net: filter: split 'struct sk_filter' into socket and bpf parts

clean up names related to socket filtering and bpf in the following way:
- everything that deals with sockets keeps 'sk_*' prefix
- everything that is pure BPF is changed to 'bpf_*' prefix

split 'struct sk_filter' into
struct sk_filter {
	atomic_t        refcnt;
	struct rcu_head rcu;
	struct bpf_prog *prog;
};
and
struct bpf_prog {
        u32                     jited:1,
                                len:31;
        struct sock_fprog_kern  *orig_prog;
        unsigned int            (*bpf_func)(const struct sk_buff *skb,
                                            const struct bpf_insn *filter);
        union {
                struct sock_filter      insns[0];
                struct bpf_insn         insnsi[0];
                struct work_struct      work;
        };
};
so that 'struct bpf_prog' can be used independent of sockets and cleans up
'unattached' bpf use cases

split SK_RUN_FILTER macro into:
    SK_RUN_FILTER to be used with 'struct sk_filter *' and
    BPF_PROG_RUN to be used with 'struct bpf_prog *'

__sk_filter_release(struct sk_filter *) gains
__bpf_prog_release(struct bpf_prog *) helper function

also perform related renames for the functions that work
with 'struct bpf_prog *', since they're on the same lines:

sk_filter_size -> bpf_prog_size
sk_filter_select_runtime -> bpf_prog_select_runtime
sk_filter_free -> bpf_prog_free
sk_unattached_filter_create -> bpf_prog_create
sk_unattached_filter_destroy -> bpf_prog_destroy
sk_store_orig_filter -> bpf_prog_store_orig_filter
sk_release_orig_filter -> bpf_release_orig_filter
__sk_migrate_filter -> bpf_migrate_filter
__sk_prepare_filter -> bpf_prepare_filter

API for attaching classic BPF to a socket stays the same:
sk_attach_filter(prog, struct sock *)/sk_detach_filter(struct sock *)
and SK_RUN_FILTER(struct sk_filter *, ctx) to execute a program
which is used by sockets, tun, af_packet

API for 'unattached' BPF programs becomes:
bpf_prog_create(struct bpf_prog **)/bpf_prog_destroy(struct bpf_prog *)
and BPF_PROG_RUN(struct bpf_prog *, ctx) to execute a program
which is used by isdn, ppp, team, seccomp, ptp, xt_bpf, cls_bpf, test_bpf

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt      | 10 ++--
 arch/arm/net/bpf_jit_32.c                |  8 +--
 arch/mips/net/bpf_jit.c                  |  8 +--
 arch/powerpc/net/bpf_jit_comp.c          |  8 +--
 arch/s390/net/bpf_jit_comp.c             |  4 +-
 arch/sparc/net/bpf_jit_comp.c            |  4 +-
 arch/x86/net/bpf_jit_comp.c              | 12 ++---
 drivers/isdn/i4l/isdn_ppp.c              | 26 +++++----
 drivers/net/ppp/ppp_generic.c            | 28 +++++-----
 drivers/net/team/team_mode_loadbalance.c | 14 ++---
 include/linux/filter.h                   | 40 ++++++++------
 include/linux/isdn_ppp.h                 |  4 +-
 include/uapi/linux/netfilter/xt_bpf.h    |  4 +-
 kernel/bpf/core.c                        | 30 +++++------
 kernel/seccomp.c                         | 10 ++--
 lib/test_bpf.c                           | 24 ++++-----
 net/core/filter.c                        | 92 ++++++++++++++++++--------------
 net/core/ptp_classifier.c                |  6 +--
 net/core/sock_diag.c                     |  2 +-
 net/netfilter/xt_bpf.c                   |  6 +--
 net/sched/cls_bpf.c                      | 12 ++---
 21 files changed, 183 insertions(+), 169 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 712068be8171..c48a9704bda8 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -586,11 +586,11 @@ team driver's classifier for its load-balancing mode, netfilter's xt_bpf
 extension, PTP dissector/classifier, and much more. They are all internally
 converted by the kernel into the new instruction set representation and run
 in the eBPF interpreter. For in-kernel handlers, this all works transparently
-by using sk_unattached_filter_create() for setting up the filter, resp.
-sk_unattached_filter_destroy() for destroying it. The macro
-SK_RUN_FILTER(filter, ctx) transparently invokes eBPF interpreter or JITed
-code to run the filter. 'filter' is a pointer to struct sk_filter that we
-got from sk_unattached_filter_create(), and 'ctx' the given context (e.g.
+by using bpf_prog_create() for setting up the filter, resp.
+bpf_prog_destroy() for destroying it. The macro
+BPF_PROG_RUN(filter, ctx) transparently invokes eBPF interpreter or JITed
+code to run the filter. 'filter' is a pointer to struct bpf_prog that we
+got from bpf_prog_create(), and 'ctx' the given context (e.g.
 skb pointer). All constraints and restrictions from bpf_check_classic() apply
 before a conversion to the new layout is being done behind the scenes!
 
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index fb5503ce016f..a37b989a2f91 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -56,7 +56,7 @@
 #define FLAG_NEED_X_RESET	(1 << 0)
 
 struct jit_ctx {
-	const struct sk_filter *skf;
+	const struct bpf_prog *skf;
 	unsigned idx;
 	unsigned prologue_bytes;
 	int ret0_fp_idx;
@@ -465,7 +465,7 @@ static inline void update_on_xread(struct jit_ctx *ctx)
 static int build_body(struct jit_ctx *ctx)
 {
 	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
-	const struct sk_filter *prog = ctx->skf;
+	const struct bpf_prog *prog = ctx->skf;
 	const struct sock_filter *inst;
 	unsigned i, load_order, off, condt;
 	int imm12;
@@ -857,7 +857,7 @@ b_epilogue:
 }
 
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	struct jit_ctx ctx;
 	unsigned tmp_idx;
@@ -926,7 +926,7 @@ out:
 	return;
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index b87390a56a2f..05a56619ece2 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -131,7 +131,7 @@
  * @target:		Memory location for the compiled filter
  */
 struct jit_ctx {
-	const struct sk_filter *skf;
+	const struct bpf_prog *skf;
 	unsigned int prologue_bytes;
 	u32 idx;
 	u32 flags;
@@ -789,7 +789,7 @@ static int pkt_type_offset(void)
 static int build_body(struct jit_ctx *ctx)
 {
 	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
-	const struct sk_filter *prog = ctx->skf;
+	const struct bpf_prog *prog = ctx->skf;
 	const struct sock_filter *inst;
 	unsigned int i, off, load_order, condt;
 	u32 k, b_off __maybe_unused;
@@ -1369,7 +1369,7 @@ jmp_cmp:
 
 int bpf_jit_enable __read_mostly;
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	struct jit_ctx ctx;
 	unsigned int alloc_size, tmp_idx;
@@ -1423,7 +1423,7 @@ out:
 	kfree(ctx.offsets);
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 82e82cadcde5..3afa6f4c1957 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -25,7 +25,7 @@ static inline void bpf_flush_icache(void *start, void *end)
 	flush_icache_range((unsigned long)start, (unsigned long)end);
 }
 
-static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image,
+static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
 				   struct codegen_context *ctx)
 {
 	int i;
@@ -121,7 +121,7 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
 /* Assemble the body code between the prologue & epilogue. */
-static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
+static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			      struct codegen_context *ctx,
 			      unsigned int *addrs)
 {
@@ -569,7 +569,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 	return 0;
 }
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	unsigned int proglen;
 	unsigned int alloclen;
@@ -693,7 +693,7 @@ out:
 	return;
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index a2cbd875543a..61e45b7c04d7 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -812,7 +812,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize,
 	return header;
 }
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	struct bpf_binary_header *header = NULL;
 	unsigned long size, prg_len, lit_len;
@@ -875,7 +875,7 @@ out:
 	kfree(addrs);
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
 	struct bpf_binary_header *header = (void *)addr;
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 892a102671ad..1f76c22a6a75 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -354,7 +354,7 @@ do {	*prog++ = BR_OPC | WDISP22(OFF);		\
  * emit_jump() calls with adjusted offsets.
  */
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	unsigned int cleanup_addr, proglen, oldproglen = 0;
 	u32 temp[8], *prog, *func, seen = 0, pass;
@@ -808,7 +808,7 @@ out:
 	return;
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e2ecc1380b3d..5c8cb8043c5a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -211,7 +211,7 @@ struct jit_context {
 	bool seen_ld_abs;
 };
 
-static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
+static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
 	struct bpf_insn *insn = bpf_prog->insnsi;
@@ -841,7 +841,7 @@ common_load:		ctx->seen_ld_abs = true;
 			/* By design x64 JIT should support all BPF instructions
 			 * This error will be seen if new instruction was added
 			 * to interpreter, but not to JIT
-			 * or if there is junk in sk_filter
+			 * or if there is junk in bpf_prog
 			 */
 			pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
 			return -EINVAL;
@@ -862,11 +862,11 @@ common_load:		ctx->seen_ld_abs = true;
 	return proglen;
 }
 
-void bpf_jit_compile(struct sk_filter *prog)
+void bpf_jit_compile(struct bpf_prog *prog)
 {
 }
 
-void bpf_int_jit_compile(struct sk_filter *prog)
+void bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	struct bpf_binary_header *header = NULL;
 	int proglen, oldproglen = 0;
@@ -932,7 +932,7 @@ out:
 
 static void bpf_jit_free_deferred(struct work_struct *work)
 {
-	struct sk_filter *fp = container_of(work, struct sk_filter, work);
+	struct bpf_prog *fp = container_of(work, struct bpf_prog, work);
 	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
 	struct bpf_binary_header *header = (void *)addr;
 
@@ -941,7 +941,7 @@ static void bpf_jit_free_deferred(struct work_struct *work)
 	kfree(fp);
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited) {
 		INIT_WORK(&fp->work, bpf_jit_free_deferred);
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 62f0688d45a5..c4198fa490bf 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -379,12 +379,12 @@ isdn_ppp_release(int min, struct file *file)
 #endif
 #ifdef CONFIG_IPPP_FILTER
 	if (is->pass_filter) {
-		sk_unattached_filter_destroy(is->pass_filter);
+		bpf_prog_destroy(is->pass_filter);
 		is->pass_filter = NULL;
 	}
 
 	if (is->active_filter) {
-		sk_unattached_filter_destroy(is->active_filter);
+		bpf_prog_destroy(is->active_filter);
 		is->active_filter = NULL;
 	}
 #endif
@@ -639,12 +639,11 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg)
 		fprog.filter = code;
 
 		if (is->pass_filter) {
-			sk_unattached_filter_destroy(is->pass_filter);
+			bpf_prog_destroy(is->pass_filter);
 			is->pass_filter = NULL;
 		}
 		if (fprog.filter != NULL)
-			err = sk_unattached_filter_create(&is->pass_filter,
-							  &fprog);
+			err = bpf_prog_create(&is->pass_filter, &fprog);
 		else
 			err = 0;
 		kfree(code);
@@ -664,12 +663,11 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg)
 		fprog.filter = code;
 
 		if (is->active_filter) {
-			sk_unattached_filter_destroy(is->active_filter);
+			bpf_prog_destroy(is->active_filter);
 			is->active_filter = NULL;
 		}
 		if (fprog.filter != NULL)
-			err = sk_unattached_filter_create(&is->active_filter,
-							  &fprog);
+			err = bpf_prog_create(&is->active_filter, &fprog);
 		else
 			err = 0;
 		kfree(code);
@@ -1174,14 +1172,14 @@ isdn_ppp_push_higher(isdn_net_dev *net_dev, isdn_net_local *lp, struct sk_buff *
 	}
 
 	if (is->pass_filter
-	    && SK_RUN_FILTER(is->pass_filter, skb) == 0) {
+	    && BPF_PROG_RUN(is->pass_filter, skb) == 0) {
 		if (is->debug & 0x2)
 			printk(KERN_DEBUG "IPPP: inbound frame filtered.\n");
 		kfree_skb(skb);
 		return;
 	}
 	if (!(is->active_filter
-	      && SK_RUN_FILTER(is->active_filter, skb) == 0)) {
+	      && BPF_PROG_RUN(is->active_filter, skb) == 0)) {
 		if (is->debug & 0x2)
 			printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n");
 		lp->huptimer = 0;
@@ -1320,14 +1318,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev)
 	}
 
 	if (ipt->pass_filter
-	    && SK_RUN_FILTER(ipt->pass_filter, skb) == 0) {
+	    && BPF_PROG_RUN(ipt->pass_filter, skb) == 0) {
 		if (ipt->debug & 0x4)
 			printk(KERN_DEBUG "IPPP: outbound frame filtered.\n");
 		kfree_skb(skb);
 		goto unlock;
 	}
 	if (!(ipt->active_filter
-	      && SK_RUN_FILTER(ipt->active_filter, skb) == 0)) {
+	      && BPF_PROG_RUN(ipt->active_filter, skb) == 0)) {
 		if (ipt->debug & 0x4)
 			printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n");
 		lp->huptimer = 0;
@@ -1517,9 +1515,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp)
 	}
 
 	drop |= is->pass_filter
-		&& SK_RUN_FILTER(is->pass_filter, skb) == 0;
+		&& BPF_PROG_RUN(is->pass_filter, skb) == 0;
 	drop |= is->active_filter
-		&& SK_RUN_FILTER(is->active_filter, skb) == 0;
+		&& BPF_PROG_RUN(is->active_filter, skb) == 0;
 
 	skb_push(skb, IPPP_MAX_HEADER - 4);
 	return drop;
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 765248b42a0a..fa0d71727894 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -143,8 +143,8 @@ struct ppp {
 	struct sk_buff_head mrq;	/* MP: receive reconstruction queue */
 #endif /* CONFIG_PPP_MULTILINK */
 #ifdef CONFIG_PPP_FILTER
-	struct sk_filter *pass_filter;	/* filter for packets to pass */
-	struct sk_filter *active_filter;/* filter for pkts to reset idle */
+	struct bpf_prog *pass_filter;	/* filter for packets to pass */
+	struct bpf_prog *active_filter; /* filter for pkts to reset idle */
 #endif /* CONFIG_PPP_FILTER */
 	struct net	*ppp_net;	/* the net we belong to */
 	struct ppp_link_stats stats64;	/* 64 bit network stats */
@@ -762,12 +762,12 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 			ppp_lock(ppp);
 			if (ppp->pass_filter) {
-				sk_unattached_filter_destroy(ppp->pass_filter);
+				bpf_prog_destroy(ppp->pass_filter);
 				ppp->pass_filter = NULL;
 			}
 			if (fprog.filter != NULL)
-				err = sk_unattached_filter_create(&ppp->pass_filter,
-								  &fprog);
+				err = bpf_prog_create(&ppp->pass_filter,
+						      &fprog);
 			else
 				err = 0;
 			kfree(code);
@@ -788,12 +788,12 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 			ppp_lock(ppp);
 			if (ppp->active_filter) {
-				sk_unattached_filter_destroy(ppp->active_filter);
+				bpf_prog_destroy(ppp->active_filter);
 				ppp->active_filter = NULL;
 			}
 			if (fprog.filter != NULL)
-				err = sk_unattached_filter_create(&ppp->active_filter,
-								  &fprog);
+				err = bpf_prog_create(&ppp->active_filter,
+						      &fprog);
 			else
 				err = 0;
 			kfree(code);
@@ -1205,7 +1205,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
 		   a four-byte PPP header on each packet */
 		*skb_push(skb, 2) = 1;
 		if (ppp->pass_filter &&
-		    SK_RUN_FILTER(ppp->pass_filter, skb) == 0) {
+		    BPF_PROG_RUN(ppp->pass_filter, skb) == 0) {
 			if (ppp->debug & 1)
 				netdev_printk(KERN_DEBUG, ppp->dev,
 					      "PPP: outbound frame "
@@ -1215,7 +1215,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
 		}
 		/* if this packet passes the active filter, record the time */
 		if (!(ppp->active_filter &&
-		      SK_RUN_FILTER(ppp->active_filter, skb) == 0))
+		      BPF_PROG_RUN(ppp->active_filter, skb) == 0))
 			ppp->last_xmit = jiffies;
 		skb_pull(skb, 2);
 #else
@@ -1839,7 +1839,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 
 			*skb_push(skb, 2) = 0;
 			if (ppp->pass_filter &&
-			    SK_RUN_FILTER(ppp->pass_filter, skb) == 0) {
+			    BPF_PROG_RUN(ppp->pass_filter, skb) == 0) {
 				if (ppp->debug & 1)
 					netdev_printk(KERN_DEBUG, ppp->dev,
 						      "PPP: inbound frame "
@@ -1848,7 +1848,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 				return;
 			}
 			if (!(ppp->active_filter &&
-			      SK_RUN_FILTER(ppp->active_filter, skb) == 0))
+			      BPF_PROG_RUN(ppp->active_filter, skb) == 0))
 				ppp->last_recv = jiffies;
 			__skb_pull(skb, 2);
 		} else
@@ -2829,12 +2829,12 @@ static void ppp_destroy_interface(struct ppp *ppp)
 #endif /* CONFIG_PPP_MULTILINK */
 #ifdef CONFIG_PPP_FILTER
 	if (ppp->pass_filter) {
-		sk_unattached_filter_destroy(ppp->pass_filter);
+		bpf_prog_destroy(ppp->pass_filter);
 		ppp->pass_filter = NULL;
 	}
 
 	if (ppp->active_filter) {
-		sk_unattached_filter_destroy(ppp->active_filter);
+		bpf_prog_destroy(ppp->active_filter);
 		ppp->active_filter = NULL;
 	}
 #endif /* CONFIG_PPP_FILTER */
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index d7be9b36bce6..a1536d0d83a9 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -58,7 +58,7 @@ struct lb_priv_ex {
 };
 
 struct lb_priv {
-	struct sk_filter __rcu *fp;
+	struct bpf_prog __rcu *fp;
 	lb_select_tx_port_func_t __rcu *select_tx_port_func;
 	struct lb_pcpu_stats __percpu *pcpu_stats;
 	struct lb_priv_ex *ex; /* priv extension */
@@ -174,14 +174,14 @@ static lb_select_tx_port_func_t *lb_select_tx_port_get_func(const char *name)
 static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv,
 				    struct sk_buff *skb)
 {
-	struct sk_filter *fp;
+	struct bpf_prog *fp;
 	uint32_t lhash;
 	unsigned char *c;
 
 	fp = rcu_dereference_bh(lb_priv->fp);
 	if (unlikely(!fp))
 		return 0;
-	lhash = SK_RUN_FILTER(fp, skb);
+	lhash = BPF_PROG_RUN(fp, skb);
 	c = (char *) &lhash;
 	return c[0] ^ c[1] ^ c[2] ^ c[3];
 }
@@ -271,8 +271,8 @@ static void __fprog_destroy(struct sock_fprog_kern *fprog)
 static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
 {
 	struct lb_priv *lb_priv = get_lb_priv(team);
-	struct sk_filter *fp = NULL;
-	struct sk_filter *orig_fp = NULL;
+	struct bpf_prog *fp = NULL;
+	struct bpf_prog *orig_fp = NULL;
 	struct sock_fprog_kern *fprog = NULL;
 	int err;
 
@@ -281,7 +281,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
 				     ctx->data.bin_val.ptr);
 		if (err)
 			return err;
-		err = sk_unattached_filter_create(&fp, fprog);
+		err = bpf_prog_create(&fp, fprog);
 		if (err) {
 			__fprog_destroy(fprog);
 			return err;
@@ -300,7 +300,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
 
 	if (orig_fp) {
 		synchronize_rcu();
-		sk_unattached_filter_destroy(orig_fp);
+		bpf_prog_destroy(orig_fp);
 	}
 	return 0;
 }
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7cb9b40e9a2f..a5227ab8ccb1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -296,7 +296,8 @@ enum {
 })
 
 /* Macro to invoke filter function. */
-#define SK_RUN_FILTER(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
+#define SK_RUN_FILTER(filter, ctx) \
+	(*filter->prog->bpf_func)(ctx, filter->prog->insnsi)
 
 struct bpf_insn {
 	__u8	code;		/* opcode */
@@ -323,12 +324,10 @@ struct sk_buff;
 struct sock;
 struct seccomp_data;
 
-struct sk_filter {
-	atomic_t		refcnt;
+struct bpf_prog {
 	u32			jited:1,	/* Is our filter JIT'ed? */
 				len:31;		/* Number of filter blocks */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
-	struct rcu_head		rcu;
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
 					    const struct bpf_insn *filter);
 	union {
@@ -338,25 +337,32 @@ struct sk_filter {
 	};
 };
 
-static inline unsigned int sk_filter_size(unsigned int proglen)
+struct sk_filter {
+	atomic_t	refcnt;
+	struct rcu_head	rcu;
+	struct bpf_prog	*prog;
+};
+
+#define BPF_PROG_RUN(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
+
+static inline unsigned int bpf_prog_size(unsigned int proglen)
 {
-	return max(sizeof(struct sk_filter),
-		   offsetof(struct sk_filter, insns[proglen]));
+	return max(sizeof(struct bpf_prog),
+		   offsetof(struct bpf_prog, insns[proglen]));
 }
 
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
-void sk_filter_select_runtime(struct sk_filter *fp);
-void sk_filter_free(struct sk_filter *fp);
+void bpf_prog_select_runtime(struct bpf_prog *fp);
+void bpf_prog_free(struct bpf_prog *fp);
 
 int bpf_convert_filter(struct sock_filter *prog, int len,
 		       struct bpf_insn *new_prog, int *new_len);
 
-int sk_unattached_filter_create(struct sk_filter **pfp,
-				struct sock_fprog_kern *fprog);
-void sk_unattached_filter_destroy(struct sk_filter *fp);
+int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
+void bpf_prog_destroy(struct bpf_prog *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_detach_filter(struct sock *sk);
@@ -369,7 +375,7 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 
 u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
-void bpf_int_jit_compile(struct sk_filter *fp);
+void bpf_int_jit_compile(struct bpf_prog *fp);
 
 #define BPF_ANC		BIT(15)
 
@@ -423,8 +429,8 @@ static inline void *bpf_load_pointer(const struct sk_buff *skb, int k,
 #include <linux/linkage.h>
 #include <linux/printk.h>
 
-void bpf_jit_compile(struct sk_filter *fp);
-void bpf_jit_free(struct sk_filter *fp);
+void bpf_jit_compile(struct bpf_prog *fp);
+void bpf_jit_free(struct bpf_prog *fp);
 
 static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
 				u32 pass, void *image)
@@ -438,11 +444,11 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
 #else
 #include <linux/slab.h>
 
-static inline void bpf_jit_compile(struct sk_filter *fp)
+static inline void bpf_jit_compile(struct bpf_prog *fp)
 {
 }
 
-static inline void bpf_jit_free(struct sk_filter *fp)
+static inline void bpf_jit_free(struct bpf_prog *fp)
 {
 	kfree(fp);
 }
diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h
index 8e10f57f109f..a0070c6dfaf8 100644
--- a/include/linux/isdn_ppp.h
+++ b/include/linux/isdn_ppp.h
@@ -180,8 +180,8 @@ struct ippp_struct {
   struct slcompress *slcomp;
 #endif
 #ifdef CONFIG_IPPP_FILTER
-  struct sk_filter *pass_filter;   /* filter for packets to pass */
-  struct sk_filter *active_filter; /* filter for pkts to reset idle */
+  struct bpf_prog *pass_filter;   /* filter for packets to pass */
+  struct bpf_prog *active_filter; /* filter for pkts to reset idle */
 #endif
   unsigned long debug;
   struct isdn_ppp_compressor *compressor,*decompressor;
diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h
index 2ec9fbcd06f9..1fad2c27ac32 100644
--- a/include/uapi/linux/netfilter/xt_bpf.h
+++ b/include/uapi/linux/netfilter/xt_bpf.h
@@ -6,14 +6,14 @@
 
 #define XT_BPF_MAX_NUM_INSTR	64
 
-struct sk_filter;
+struct bpf_prog;
 
 struct xt_bpf_info {
 	__u16 bpf_program_num_elem;
 	struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR];
 
 	/* only used in the kernel */
-	struct sk_filter *filter __attribute__((aligned(8)));
+	struct bpf_prog *filter __attribute__((aligned(8)));
 };
 
 #endif /*_XT_BPF_H */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 188ac5ba3900..7f0dbcbb34af 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -73,15 +73,13 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 }
 
 /**
- *	__sk_run_filter - run a filter on a given context
- *	@ctx: buffer to run the filter on
- *	@insn: filter to apply
+ *	__bpf_prog_run - run eBPF program on a given context
+ *	@ctx: is the data we are operating on
+ *	@insn: is the array of eBPF instructions
  *
- * Decode and apply filter instructions to the skb->data. Return length to
- * keep, 0 for none. @ctx is the data we are operating on, @insn is the
- * array of filter instructions.
+ * Decode and execute eBPF instructions.
  */
-static unsigned int __sk_run_filter(void *ctx, const struct bpf_insn *insn)
+static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 {
 	u64 stack[MAX_BPF_STACK / sizeof(u64)];
 	u64 regs[MAX_BPF_REG], tmp;
@@ -508,29 +506,29 @@ load_byte:
 		return 0;
 }
 
-void __weak bpf_int_jit_compile(struct sk_filter *prog)
+void __weak bpf_int_jit_compile(struct bpf_prog *prog)
 {
 }
 
 /**
- *	sk_filter_select_runtime - select execution runtime for BPF program
- *	@fp: sk_filter populated with internal BPF program
+ *	bpf_prog_select_runtime - select execution runtime for BPF program
+ *	@fp: bpf_prog populated with internal BPF program
  *
  * try to JIT internal BPF program, if JIT is not available select interpreter
- * BPF program will be executed via SK_RUN_FILTER() macro
+ * BPF program will be executed via BPF_PROG_RUN() macro
  */
-void sk_filter_select_runtime(struct sk_filter *fp)
+void bpf_prog_select_runtime(struct bpf_prog *fp)
 {
-	fp->bpf_func = (void *) __sk_run_filter;
+	fp->bpf_func = (void *) __bpf_prog_run;
 
 	/* Probe if internal BPF can be JITed */
 	bpf_int_jit_compile(fp);
 }
-EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
+EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 
 /* free internal BPF program */
-void sk_filter_free(struct sk_filter *fp)
+void bpf_prog_free(struct bpf_prog *fp)
 {
 	bpf_jit_free(fp);
 }
-EXPORT_SYMBOL_GPL(sk_filter_free);
+EXPORT_SYMBOL_GPL(bpf_prog_free);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 33a3a97e2b58..2f3fa2cc2eac 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -54,7 +54,7 @@
 struct seccomp_filter {
 	atomic_t usage;
 	struct seccomp_filter *prev;
-	struct sk_filter *prog;
+	struct bpf_prog *prog;
 };
 
 /* Limit any path through the tree to 256KB worth of instructions. */
@@ -187,7 +187,7 @@ static u32 seccomp_run_filters(int syscall)
 	 * value always takes priority (ignoring the DATA).
 	 */
 	for (f = current->seccomp.filter; f; f = f->prev) {
-		u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
+		u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd);
 
 		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
 			ret = cur_ret;
@@ -260,7 +260,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	if (!filter)
 		goto free_prog;
 
-	filter->prog = kzalloc(sk_filter_size(new_len),
+	filter->prog = kzalloc(bpf_prog_size(new_len),
 			       GFP_KERNEL|__GFP_NOWARN);
 	if (!filter->prog)
 		goto free_filter;
@@ -273,7 +273,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	atomic_set(&filter->usage, 1);
 	filter->prog->len = new_len;
 
-	sk_filter_select_runtime(filter->prog);
+	bpf_prog_select_runtime(filter->prog);
 
 	/*
 	 * If there is an existing filter, make it the prev and don't drop its
@@ -337,7 +337,7 @@ void put_seccomp_filter(struct task_struct *tsk)
 	while (orig && atomic_dec_and_test(&orig->usage)) {
 		struct seccomp_filter *freeme = orig;
 		orig = orig->prev;
-		sk_filter_free(freeme->prog);
+		bpf_prog_free(freeme->prog);
 		kfree(freeme);
 	}
 }
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 5f48623ee1a7..89e0345733bd 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1761,9 +1761,9 @@ static int probe_filter_length(struct sock_filter *fp)
 	return len + 1;
 }
 
-static struct sk_filter *generate_filter(int which, int *err)
+static struct bpf_prog *generate_filter(int which, int *err)
 {
-	struct sk_filter *fp;
+	struct bpf_prog *fp;
 	struct sock_fprog_kern fprog;
 	unsigned int flen = probe_filter_length(tests[which].u.insns);
 	__u8 test_type = tests[which].aux & TEST_TYPE_MASK;
@@ -1773,7 +1773,7 @@ static struct sk_filter *generate_filter(int which, int *err)
 		fprog.filter = tests[which].u.insns;
 		fprog.len = flen;
 
-		*err = sk_unattached_filter_create(&fp, &fprog);
+		*err = bpf_prog_create(&fp, &fprog);
 		if (tests[which].aux & FLAG_EXPECTED_FAIL) {
 			if (*err == -EINVAL) {
 				pr_cont("PASS\n");
@@ -1798,7 +1798,7 @@ static struct sk_filter *generate_filter(int which, int *err)
 		break;
 
 	case INTERNAL:
-		fp = kzalloc(sk_filter_size(flen), GFP_KERNEL);
+		fp = kzalloc(bpf_prog_size(flen), GFP_KERNEL);
 		if (fp == NULL) {
 			pr_cont("UNEXPECTED_FAIL no memory left\n");
 			*err = -ENOMEM;
@@ -1809,7 +1809,7 @@ static struct sk_filter *generate_filter(int which, int *err)
 		memcpy(fp->insnsi, tests[which].u.insns_int,
 		       fp->len * sizeof(struct bpf_insn));
 
-		sk_filter_select_runtime(fp);
+		bpf_prog_select_runtime(fp);
 		break;
 	}
 
@@ -1817,21 +1817,21 @@ static struct sk_filter *generate_filter(int which, int *err)
 	return fp;
 }
 
-static void release_filter(struct sk_filter *fp, int which)
+static void release_filter(struct bpf_prog *fp, int which)
 {
 	__u8 test_type = tests[which].aux & TEST_TYPE_MASK;
 
 	switch (test_type) {
 	case CLASSIC:
-		sk_unattached_filter_destroy(fp);
+		bpf_prog_destroy(fp);
 		break;
 	case INTERNAL:
-		sk_filter_free(fp);
+		bpf_prog_free(fp);
 		break;
 	}
 }
 
-static int __run_one(const struct sk_filter *fp, const void *data,
+static int __run_one(const struct bpf_prog *fp, const void *data,
 		     int runs, u64 *duration)
 {
 	u64 start, finish;
@@ -1840,7 +1840,7 @@ static int __run_one(const struct sk_filter *fp, const void *data,
 	start = ktime_to_us(ktime_get());
 
 	for (i = 0; i < runs; i++)
-		ret = SK_RUN_FILTER(fp, data);
+		ret = BPF_PROG_RUN(fp, data);
 
 	finish = ktime_to_us(ktime_get());
 
@@ -1850,7 +1850,7 @@ static int __run_one(const struct sk_filter *fp, const void *data,
 	return ret;
 }
 
-static int run_one(const struct sk_filter *fp, struct bpf_test *test)
+static int run_one(const struct bpf_prog *fp, struct bpf_test *test)
 {
 	int err_cnt = 0, i, runs = MAX_TESTRUNS;
 
@@ -1884,7 +1884,7 @@ static __init int test_bpf(void)
 	int i, err_cnt = 0, pass_cnt = 0;
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
-		struct sk_filter *fp;
+		struct bpf_prog *fp;
 		int err;
 
 		pr_info("#%d %s ", i, tests[i].descr);
diff --git a/net/core/filter.c b/net/core/filter.c
index 6ac901613bee..d814b8a89d0f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -810,8 +810,8 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
 }
 EXPORT_SYMBOL(bpf_check_classic);
 
-static int sk_store_orig_filter(struct sk_filter *fp,
-				const struct sock_fprog *fprog)
+static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
+				      const struct sock_fprog *fprog)
 {
 	unsigned int fsize = bpf_classic_proglen(fprog);
 	struct sock_fprog_kern *fkprog;
@@ -831,7 +831,7 @@ static int sk_store_orig_filter(struct sk_filter *fp,
 	return 0;
 }
 
-static void sk_release_orig_filter(struct sk_filter *fp)
+static void bpf_release_orig_filter(struct bpf_prog *fp)
 {
 	struct sock_fprog_kern *fprog = fp->orig_prog;
 
@@ -841,10 +841,16 @@ static void sk_release_orig_filter(struct sk_filter *fp)
 	}
 }
 
+static void __bpf_prog_release(struct bpf_prog *prog)
+{
+	bpf_release_orig_filter(prog);
+	bpf_prog_free(prog);
+}
+
 static void __sk_filter_release(struct sk_filter *fp)
 {
-	sk_release_orig_filter(fp);
-	sk_filter_free(fp);
+	__bpf_prog_release(fp->prog);
+	kfree(fp);
 }
 
 /**
@@ -872,7 +878,7 @@ static void sk_filter_release(struct sk_filter *fp)
 
 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
 {
-	u32 filter_size = sk_filter_size(fp->len);
+	u32 filter_size = bpf_prog_size(fp->prog->len);
 
 	atomic_sub(filter_size, &sk->sk_omem_alloc);
 	sk_filter_release(fp);
@@ -883,7 +889,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
  */
 bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
 {
-	u32 filter_size = sk_filter_size(fp->len);
+	u32 filter_size = bpf_prog_size(fp->prog->len);
 
 	/* same check as in sock_kmalloc() */
 	if (filter_size <= sysctl_optmem_max &&
@@ -895,10 +901,10 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
 	return false;
 }
 
-static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
+static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
 {
 	struct sock_filter *old_prog;
-	struct sk_filter *old_fp;
+	struct bpf_prog *old_fp;
 	int err, new_len, old_len = fp->len;
 
 	/* We are free to overwrite insns et al right here as it
@@ -927,7 +933,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
 
 	/* Expand fp for appending the new filter representation. */
 	old_fp = fp;
-	fp = krealloc(old_fp, sk_filter_size(new_len), GFP_KERNEL);
+	fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL);
 	if (!fp) {
 		/* The old_fp is still around in case we couldn't
 		 * allocate new memory, so uncharge on that one.
@@ -949,7 +955,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
 		 */
 		goto out_err_free;
 
-	sk_filter_select_runtime(fp);
+	bpf_prog_select_runtime(fp);
 
 	kfree(old_prog);
 	return fp;
@@ -957,11 +963,11 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
 out_err_free:
 	kfree(old_prog);
 out_err:
-	__sk_filter_release(fp);
+	__bpf_prog_release(fp);
 	return ERR_PTR(err);
 }
 
-static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
+static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
 {
 	int err;
 
@@ -970,7 +976,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
 
 	err = bpf_check_classic(fp->insns, fp->len);
 	if (err) {
-		__sk_filter_release(fp);
+		__bpf_prog_release(fp);
 		return ERR_PTR(err);
 	}
 
@@ -983,13 +989,13 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
 	 * internal BPF translation for the optimized interpreter.
 	 */
 	if (!fp->jited)
-		fp = __sk_migrate_filter(fp);
+		fp = bpf_migrate_filter(fp);
 
 	return fp;
 }
 
 /**
- *	sk_unattached_filter_create - create an unattached filter
+ *	bpf_prog_create - create an unattached filter
  *	@pfp: the unattached filter that is created
  *	@fprog: the filter program
  *
@@ -998,23 +1004,21 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
  * If an error occurs or there is insufficient memory for the filter
  * a negative errno code is returned. On success the return is zero.
  */
-int sk_unattached_filter_create(struct sk_filter **pfp,
-				struct sock_fprog_kern *fprog)
+int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
 {
 	unsigned int fsize = bpf_classic_proglen(fprog);
-	struct sk_filter *fp;
+	struct bpf_prog *fp;
 
 	/* Make sure new filter is there and in the right amounts. */
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
+	fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL);
 	if (!fp)
 		return -ENOMEM;
 
 	memcpy(fp->insns, fprog->filter, fsize);
 
-	atomic_set(&fp->refcnt, 1);
 	fp->len = fprog->len;
 	/* Since unattached filters are not copied back to user
 	 * space through sk_get_filter(), we do not need to hold
@@ -1022,23 +1026,23 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
 	 */
 	fp->orig_prog = NULL;
 
-	/* __sk_prepare_filter() already takes care of freeing
+	/* bpf_prepare_filter() already takes care of freeing
 	 * memory in case something goes wrong.
 	 */
-	fp = __sk_prepare_filter(fp);
+	fp = bpf_prepare_filter(fp);
 	if (IS_ERR(fp))
 		return PTR_ERR(fp);
 
 	*pfp = fp;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(sk_unattached_filter_create);
+EXPORT_SYMBOL_GPL(bpf_prog_create);
 
-void sk_unattached_filter_destroy(struct sk_filter *fp)
+void bpf_prog_destroy(struct bpf_prog *fp)
 {
-	__sk_filter_release(fp);
+	__bpf_prog_release(fp);
 }
-EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);
+EXPORT_SYMBOL_GPL(bpf_prog_destroy);
 
 /**
  *	sk_attach_filter - attach a socket filter
@@ -1054,7 +1058,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 {
 	struct sk_filter *fp, *old_fp;
 	unsigned int fsize = bpf_classic_proglen(fprog);
-	unsigned int sk_fsize = sk_filter_size(fprog->len);
+	unsigned int bpf_fsize = bpf_prog_size(fprog->len);
+	struct bpf_prog *prog;
 	int err;
 
 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
@@ -1064,29 +1069,36 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = kmalloc(sk_fsize, GFP_KERNEL);
-	if (!fp)
+	prog = kmalloc(bpf_fsize, GFP_KERNEL);
+	if (!prog)
 		return -ENOMEM;
 
-	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
-		kfree(fp);
+	if (copy_from_user(prog->insns, fprog->filter, fsize)) {
+		kfree(prog);
 		return -EFAULT;
 	}
 
-	fp->len = fprog->len;
+	prog->len = fprog->len;
 
-	err = sk_store_orig_filter(fp, fprog);
+	err = bpf_prog_store_orig_filter(prog, fprog);
 	if (err) {
-		kfree(fp);
+		kfree(prog);
 		return -ENOMEM;
 	}
 
-	/* __sk_prepare_filter() already takes care of freeing
+	/* bpf_prepare_filter() already takes care of freeing
 	 * memory in case something goes wrong.
 	 */
-	fp = __sk_prepare_filter(fp);
-	if (IS_ERR(fp))
-		return PTR_ERR(fp);
+	prog = bpf_prepare_filter(prog);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	fp = kmalloc(sizeof(*fp), GFP_KERNEL);
+	if (!fp) {
+		__bpf_prog_release(prog);
+		return -ENOMEM;
+	}
+	fp->prog = prog;
 
 	atomic_set(&fp->refcnt, 0);
 
@@ -1142,7 +1154,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
 	/* We're copying the filter that has been originally attached,
 	 * so no conversion/decode needed anymore.
 	 */
-	fprog = filter->orig_prog;
+	fprog = filter->prog->orig_prog;
 
 	ret = fprog->len;
 	if (!len)
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 12ab7b4be609..4eab4a94a59d 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -107,11 +107,11 @@
 #include <linux/filter.h>
 #include <linux/ptp_classify.h>
 
-static struct sk_filter *ptp_insns __read_mostly;
+static struct bpf_prog *ptp_insns __read_mostly;
 
 unsigned int ptp_classify_raw(const struct sk_buff *skb)
 {
-	return SK_RUN_FILTER(ptp_insns, skb);
+	return BPF_PROG_RUN(ptp_insns, skb);
 }
 EXPORT_SYMBOL_GPL(ptp_classify_raw);
 
@@ -189,5 +189,5 @@ void __init ptp_classifier_init(void)
 		.len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
 	};
 
-	BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog));
+	BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog));
 }
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 57d922320c59..ad704c757bb4 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -68,7 +68,7 @@ int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
 	if (!filter)
 		goto out;
 
-	fprog = filter->orig_prog;
+	fprog = filter->prog->orig_prog;
 	flen = bpf_classic_proglen(fprog);
 
 	attr = nla_reserve(skb, attrtype, flen);
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index bbffdbdaf603..dffee9d47ec4 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -28,7 +28,7 @@ static int bpf_mt_check(const struct xt_mtchk_param *par)
 	program.len = info->bpf_program_num_elem;
 	program.filter = info->bpf_program;
 
-	if (sk_unattached_filter_create(&info->filter, &program)) {
+	if (bpf_prog_create(&info->filter, &program)) {
 		pr_info("bpf: check failed: parse error\n");
 		return -EINVAL;
 	}
@@ -40,13 +40,13 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_bpf_info *info = par->matchinfo;
 
-	return SK_RUN_FILTER(info->filter, skb);
+	return BPF_PROG_RUN(info->filter, skb);
 }
 
 static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
 {
 	const struct xt_bpf_info *info = par->matchinfo;
-	sk_unattached_filter_destroy(info->filter);
+	bpf_prog_destroy(info->filter);
 }
 
 static struct xt_match bpf_mt_reg __read_mostly = {
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 13f64df2c710..0e30d58149da 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -30,7 +30,7 @@ struct cls_bpf_head {
 };
 
 struct cls_bpf_prog {
-	struct sk_filter *filter;
+	struct bpf_prog *filter;
 	struct sock_filter *bpf_ops;
 	struct tcf_exts exts;
 	struct tcf_result res;
@@ -54,7 +54,7 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	int ret;
 
 	list_for_each_entry(prog, &head->plist, link) {
-		int filter_res = SK_RUN_FILTER(prog->filter, skb);
+		int filter_res = BPF_PROG_RUN(prog->filter, skb);
 
 		if (filter_res == 0)
 			continue;
@@ -92,7 +92,7 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
 	tcf_unbind_filter(tp, &prog->res);
 	tcf_exts_destroy(tp, &prog->exts);
 
-	sk_unattached_filter_destroy(prog->filter);
+	bpf_prog_destroy(prog->filter);
 
 	kfree(prog->bpf_ops);
 	kfree(prog);
@@ -161,7 +161,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	struct sock_filter *bpf_ops, *bpf_old;
 	struct tcf_exts exts;
 	struct sock_fprog_kern tmp;
-	struct sk_filter *fp, *fp_old;
+	struct bpf_prog *fp, *fp_old;
 	u16 bpf_size, bpf_len;
 	u32 classid;
 	int ret;
@@ -193,7 +193,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	tmp.len = bpf_len;
 	tmp.filter = bpf_ops;
 
-	ret = sk_unattached_filter_create(&fp, &tmp);
+	ret = bpf_prog_create(&fp, &tmp);
 	if (ret)
 		goto errout_free;
 
@@ -211,7 +211,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	tcf_exts_change(tp, &prog->exts, &exts);
 
 	if (fp_old)
-		sk_unattached_filter_destroy(fp_old);
+		bpf_prog_destroy(fp_old);
 	if (bpf_old)
 		kfree(bpf_old);
 
-- 
cgit v1.2.3


From 0f76b9d83b2b010b63a094024b3cfd82e20af28d Mon Sep 17 00:00:00 2001
From: Hisashi Nakamura <hisashi.nakamura.ak@renesas.com>
Date: Fri, 1 Aug 2014 17:03:00 +0200
Subject: net: sh_eth: Add r8a7794 support

Signed-off-by: Hisashi Nakamura <hisashi.nakamura.ak@renesas.com>
[uli: added bindings documentation]
Signed-off-by: Ulrich Hecht <ulrich.hecht+renesas@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/sh_eth.txt | 1 +
 drivers/net/ethernet/renesas/sh_eth.c            | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt
index e7106b50dbdc..34d4db1a4e25 100644
--- a/Documentation/devicetree/bindings/net/sh_eth.txt
+++ b/Documentation/devicetree/bindings/net/sh_eth.txt
@@ -9,6 +9,7 @@ Required properties:
 	      "renesas,ether-r8a7779"  if the device is a part of R8A7779 SoC.
 	      "renesas,ether-r8a7790"  if the device is a part of R8A7790 SoC.
 	      "renesas,ether-r8a7791"  if the device is a part of R8A7791 SoC.
+	      "renesas,ether-r8a7794"  if the device is a part of R8A7794 SoC.
 	      "renesas,ether-r7s72100" if the device is a part of R7S72100 SoC.
 - reg: offset and length of (1) the E-DMAC/feLic register block (required),
        (2) the TSU register block (optional).
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 67b11c833870..60e9c2cd051e 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2746,6 +2746,7 @@ static const struct of_device_id sh_eth_match_table[] = {
 	{ .compatible = "renesas,ether-r8a7779", .data = &r8a777x_data },
 	{ .compatible = "renesas,ether-r8a7790", .data = &r8a779x_data },
 	{ .compatible = "renesas,ether-r8a7791", .data = &r8a779x_data },
+	{ .compatible = "renesas,ether-r8a7794", .data = &r8a779x_data },
 	{ .compatible = "renesas,ether-r7s72100", .data = &r7s72100_data },
 	{ }
 };
@@ -2972,6 +2973,7 @@ static struct platform_device_id sh_eth_id_table[] = {
 	{ "r8a777x-ether", (kernel_ulong_t)&r8a777x_data },
 	{ "r8a7790-ether", (kernel_ulong_t)&r8a779x_data },
 	{ "r8a7791-ether", (kernel_ulong_t)&r8a779x_data },
+	{ "r8a7794-ether", (kernel_ulong_t)&r8a779x_data },
 	{ }
 };
 MODULE_DEVICE_TABLE(platform, sh_eth_id_table);
-- 
cgit v1.2.3


From 38758f552dc31e7f79671635ee0979b6e5e3bbed Mon Sep 17 00:00:00 2001
From: Vasu Dev <vasu.dev@intel.com>
Date: Fri, 1 Aug 2014 13:27:04 -0700
Subject: i40e: adds FCoE to build and updates its documentation

Adds newly added FCoE files to the build but only if FCoE module is configured.

Also, updates i40e document for added FCoE support.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Tested-by: Jack Morgan<jack.morgan@intel.com>
Signed-off-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/i40e.txt        | 7 +++++--
 drivers/net/ethernet/intel/i40e/Makefile | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/i40e.txt b/Documentation/networking/i40e.txt
index f737273c6dc1..a251bf4fe9c9 100644
--- a/Documentation/networking/i40e.txt
+++ b/Documentation/networking/i40e.txt
@@ -69,8 +69,11 @@ Additional Configurations
 
   FCoE
   ----
-  Fiber Channel over Ethernet (FCoE) hardware offload is not currently
-  supported.
+  The driver supports Fiber Channel over Ethernet (FCoE) and Data Center
+  Bridging (DCB) functionality. Configuring DCB and FCoE is outside the scope
+  of this driver doc. Refer to http://www.open-fcoe.org/ for FCoE project
+  information and http://www.open-lldp.org/ or email list
+  e1000-eedc@lists.sourceforge.net for DCB information.
 
   MAC and VLAN anti-spoofing feature
   ----------------------------------
diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile
index d9eb80acac4f..4b94ddb29c24 100644
--- a/drivers/net/ethernet/intel/i40e/Makefile
+++ b/drivers/net/ethernet/intel/i40e/Makefile
@@ -44,3 +44,4 @@ i40e-objs := i40e_main.o \
 	i40e_virtchnl_pf.o
 
 i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o
+i40e-$(CONFIG_FCOE:m=y) += i40e_fcoe.o
-- 
cgit v1.2.3